aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
committerDimitry Andric <dim@FreeBSD.org>2010-09-17 15:48:55 +0000
commitd39c594d39df7f283c2fb8a704a3f31c501180d9 (patch)
tree36453626c792cccd91f783a38a169d610a6b9db9
parent6144c1de6a7674dad94290650e4e14f24d42e421 (diff)
downloadsrc-d39c594d39df7f283c2fb8a704a3f31c501180d9.tar.gz
src-d39c594d39df7f283c2fb8a704a3f31c501180d9.zip
Vendor import of llvm r114020 (from the release_28 branch):vendor/llvm/llvm-r114020
Notes
Notes: svn path=/vendor/llvm/dist/; revision=212793 svn path=/vendor/llvm/llvm-r114020/; revision=212794; tag=vendor/llvm/llvm-r114020
-rw-r--r--CMakeLists.txt44
-rw-r--r--CREDITS.TXT32
-rw-r--r--Makefile14
-rw-r--r--Makefile.config.in18
-rw-r--r--Makefile.rules264
-rw-r--r--README.txt3
-rw-r--r--autoconf/configure.ac86
-rw-r--r--autoconf/m4/link_options.m421
-rw-r--r--bindings/ada/llvm/llvm.ads36
-rw-r--r--bindings/ocaml/llvm/llvm.ml12
-rw-r--r--bindings/ocaml/llvm/llvm.mli28
-rw-r--r--bindings/ocaml/llvm/llvm_ocaml.c30
-rwxr-xr-xcmake/config-ix.cmake10
-rw-r--r--cmake/modules/CMakeLists.txt12
-rw-r--r--cmake/modules/ChooseMSVCCRT.cmake106
-rw-r--r--cmake/modules/LLVM.cmake29
-rwxr-xr-xcmake/modules/LLVMConfig.cmake48
-rw-r--r--cmake/modules/LLVMLibDeps.cmake33
-rw-r--r--cmake/modules/VersionFromVCS.cmake33
-rwxr-xr-xconfigure155
-rw-r--r--docs/AliasAnalysis.html10
-rw-r--r--docs/BitCodeFormat.html17
-rw-r--r--docs/CMake.html45
-rw-r--r--docs/CodeGenerator.html6
-rw-r--r--docs/CommandGuide/bugpoint.pod4
-rw-r--r--docs/CommandGuide/index.html5
-rw-r--r--docs/CommandGuide/llvm-diff.pod53
-rw-r--r--docs/DeveloperPolicy.html54
-rw-r--r--docs/GCCFEBuildInstrs.html16
-rw-r--r--docs/GetElementPtr.html13
-rw-r--r--docs/GoldPlugin.html8
-rw-r--r--docs/LangRef.html129
-rw-r--r--docs/MakefileGuide.html5
-rw-r--r--docs/Passes.html49
-rw-r--r--docs/ProgrammersManual.html6
-rw-r--r--docs/ReleaseNotes.html598
-rw-r--r--docs/TestingGuide.html223
-rw-r--r--docs/WritingAnLLVMBackend.html5
-rw-r--r--docs/WritingAnLLVMPass.html59
-rw-r--r--docs/tutorial/LangImpl3.html10
-rw-r--r--docs/tutorial/LangImpl5.html6
-rw-r--r--docs/tutorial/LangImpl6.html4
-rw-r--r--docs/tutorial/LangImpl7.html6
-rw-r--r--examples/BrainF/BrainF.cpp11
-rw-r--r--examples/Fibonacci/fibonacci.cpp14
-rw-r--r--examples/Makefile9
-rw-r--r--include/llvm-c/Core.h20
-rw-r--r--include/llvm-c/EnhancedDisassembly.h17
-rw-r--r--include/llvm-c/ExecutionEngine.h2
-rw-r--r--include/llvm-c/Target.h47
-rw-r--r--include/llvm-c/lto.h24
-rw-r--r--include/llvm/ADT/APInt.h4
-rw-r--r--include/llvm/ADT/DenseMap.h3
-rw-r--r--include/llvm/ADT/DenseSet.h6
-rw-r--r--include/llvm/ADT/DepthFirstIterator.h10
-rw-r--r--include/llvm/ADT/FoldingSet.h177
-rw-r--r--include/llvm/ADT/ImmutableIntervalMap.h10
-rw-r--r--include/llvm/ADT/NullablePtr.h52
-rw-r--r--include/llvm/ADT/STLExtras.h4
-rw-r--r--include/llvm/ADT/ScopedHashTable.h7
-rw-r--r--include/llvm/ADT/SmallVector.h32
-rw-r--r--include/llvm/ADT/StringMap.h4
-rw-r--r--include/llvm/ADT/StringRef.h13
-rw-r--r--include/llvm/ADT/StringSet.h7
-rw-r--r--include/llvm/ADT/StringSwitch.h20
-rw-r--r--include/llvm/ADT/Triple.h32
-rw-r--r--include/llvm/ADT/ValueMap.h18
-rw-r--r--include/llvm/ADT/ilist.h1
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h109
-rw-r--r--include/llvm/Analysis/AliasSetTracker.h10
-rw-r--r--include/llvm/Analysis/DOTGraphTraitsPass.h4
-rw-r--r--include/llvm/Analysis/DebugInfo.h61
-rw-r--r--include/llvm/Analysis/Dominators.h9
-rw-r--r--include/llvm/Analysis/FindUsedTypes.h2
-rw-r--r--include/llvm/Analysis/IntervalPartition.h2
-rw-r--r--include/llvm/Analysis/LazyValueInfo.h12
-rw-r--r--include/llvm/Analysis/LibCallAliasAnalysis.h25
-rw-r--r--include/llvm/Analysis/LibCallSemantics.h5
-rw-r--r--include/llvm/Analysis/LoopDependenceAnalysis.h2
-rw-r--r--include/llvm/Analysis/LoopInfo.h26
-rw-r--r--include/llvm/Analysis/LoopPass.h12
-rw-r--r--include/llvm/Analysis/Passes.h18
-rw-r--r--include/llvm/Analysis/PointerTracking.h1
-rw-r--r--include/llvm/Analysis/PostDominators.h4
-rw-r--r--include/llvm/Analysis/RegionInfo.h630
-rw-r--r--include/llvm/Analysis/RegionIterator.h342
-rw-r--r--include/llvm/Analysis/RegionPrinter.h26
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h47
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h14
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpressions.h48
-rw-r--r--include/llvm/Analysis/ValueTracking.h19
-rw-r--r--include/llvm/Assembly/AssemblyAnnotationWriter.h (renamed from include/llvm/Assembly/AsmAnnotationWriter.h)17
-rw-r--r--include/llvm/AutoUpgrade.h5
-rw-r--r--include/llvm/Bitcode/Archive.h6
-rw-r--r--include/llvm/Bitcode/BitstreamWriter.h8
-rw-r--r--include/llvm/Bitcode/LLVMBitCodes.h31
-rw-r--r--include/llvm/CallGraphSCCPass.h5
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h13
-rw-r--r--include/llvm/CodeGen/CalcSpillWeights.h29
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h6
-rw-r--r--include/llvm/CodeGen/FunctionLoweringInfo.h10
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h9
-rw-r--r--include/llvm/CodeGen/LiveInterval.h37
-rw-r--r--include/llvm/CodeGen/LiveIntervalAnalysis.h48
-rw-r--r--include/llvm/CodeGen/LiveStackAnalysis.h2
-rw-r--r--include/llvm/CodeGen/LiveVariables.h2
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h167
-rw-r--r--include/llvm/CodeGen/MachineFunction.h2
-rw-r--r--include/llvm/CodeGen/MachineFunctionPass.h3
-rw-r--r--include/llvm/CodeGen/MachineInstr.h6
-rw-r--r--include/llvm/CodeGen/MachineLoopInfo.h2
-rw-r--r--include/llvm/CodeGen/MachineModuleInfo.h2
-rw-r--r--include/llvm/CodeGen/Passes.h62
-rw-r--r--include/llvm/CodeGen/ProcessImplicitDefs.h2
-rw-r--r--include/llvm/CodeGen/SchedulerRegistry.h13
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h4
-rw-r--r--include/llvm/CodeGen/SlotIndexes.h45
-rw-r--r--include/llvm/CodeGen/TargetLoweringObjectFileImpl.h1
-rw-r--r--include/llvm/CodeGen/ValueTypes.h110
-rw-r--r--include/llvm/CompilerDriver/Action.h16
-rw-r--r--include/llvm/CompilerDriver/AutoGenerated.h40
-rw-r--r--include/llvm/CompilerDriver/BuiltinOptions.h4
-rw-r--r--include/llvm/CompilerDriver/Common.td41
-rw-r--r--include/llvm/CompilerDriver/CompilationGraph.h95
-rw-r--r--include/llvm/CompilerDriver/Error.h20
-rw-r--r--include/llvm/CompilerDriver/ForceLinkage.h122
-rw-r--r--include/llvm/CompilerDriver/ForceLinkageMacros.h29
-rw-r--r--include/llvm/CompilerDriver/Main.h21
-rw-r--r--include/llvm/CompilerDriver/Main.inc14
-rw-r--r--include/llvm/CompilerDriver/Plugin.h81
-rw-r--r--include/llvm/CompilerDriver/Tool.h40
-rw-r--r--include/llvm/Config/config.h.cmake20
-rw-r--r--include/llvm/Config/config.h.in17
-rw-r--r--include/llvm/Config/llvm-config.h.cmake97
-rw-r--r--include/llvm/Config/llvm-config.h.in97
-rw-r--r--include/llvm/Constants.h44
-rw-r--r--include/llvm/DerivedTypes.h66
-rw-r--r--include/llvm/ExecutionEngine/JITMemoryManager.h9
-rw-r--r--include/llvm/GlobalValue.h46
-rw-r--r--include/llvm/Instruction.h12
-rw-r--r--include/llvm/Instructions.h96
-rw-r--r--include/llvm/IntrinsicInst.h16
-rw-r--r--include/llvm/Intrinsics.td69
-rw-r--r--include/llvm/IntrinsicsARM.td119
-rw-r--r--include/llvm/IntrinsicsPowerPC.td47
-rw-r--r--include/llvm/IntrinsicsX86.td482
-rw-r--r--include/llvm/LLVMContext.h5
-rw-r--r--include/llvm/LinkAllPasses.h12
-rw-r--r--include/llvm/LinkAllVMCore.h1
-rw-r--r--include/llvm/Linker.h1
-rw-r--r--include/llvm/MC/ELFObjectWriter.h46
-rw-r--r--include/llvm/MC/MCAsmInfo.h14
-rw-r--r--include/llvm/MC/MCAssembler.h23
-rw-r--r--include/llvm/MC/MCContext.h60
-rw-r--r--include/llvm/MC/MCDwarf.h156
-rw-r--r--include/llvm/MC/MCELFSymbolFlags.h54
-rw-r--r--include/llvm/MC/MCObjectStreamer.h11
-rw-r--r--include/llvm/MC/MCObjectWriter.h2
-rw-r--r--include/llvm/MC/MCParser/AsmParser.h152
-rw-r--r--include/llvm/MC/MCParser/MCAsmParser.h30
-rw-r--r--include/llvm/MC/MCParser/MCAsmParserExtension.h18
-rw-r--r--include/llvm/MC/MCParser/MCParsedAsmOperand.h8
-rw-r--r--include/llvm/MC/MCSectionELF.h10
-rw-r--r--include/llvm/MC/MCStreamer.h30
-rw-r--r--include/llvm/Metadata.h54
-rw-r--r--include/llvm/Module.h29
-rw-r--r--include/llvm/Pass.h71
-rw-r--r--include/llvm/PassAnalysisSupport.h50
-rw-r--r--include/llvm/PassManager.h1
-rw-r--r--include/llvm/PassManagers.h58
-rw-r--r--include/llvm/PassRegistry.h71
-rw-r--r--include/llvm/PassSupport.h41
-rw-r--r--include/llvm/Support/COFF.h87
-rw-r--r--include/llvm/Support/CallSite.h63
-rw-r--r--include/llvm/Support/Casting.h67
-rw-r--r--include/llvm/Support/CommandLine.h27
-rw-r--r--include/llvm/Support/Compiler.h5
-rw-r--r--include/llvm/Support/ConstantRange.h38
-rw-r--r--include/llvm/Support/CrashRecoveryContext.h84
-rw-r--r--include/llvm/Support/DataFlow.h2
-rw-r--r--include/llvm/Support/ELF.h83
-rw-r--r--include/llvm/Support/ErrorHandling.h12
-rw-r--r--include/llvm/Support/GraphWriter.h12
-rw-r--r--include/llvm/Support/IRBuilder.h20
-rw-r--r--include/llvm/Support/IRReader.h10
-rw-r--r--include/llvm/Support/MachO.h638
-rw-r--r--include/llvm/Support/MathExtras.h6
-rw-r--r--include/llvm/Support/PassNameParser.h24
-rw-r--r--include/llvm/Support/PatternMatch.h7
-rw-r--r--include/llvm/Support/PrettyStackTrace.h14
-rw-r--r--include/llvm/Support/Regex.h15
-rw-r--r--include/llvm/Support/Registry.h2
-rw-r--r--include/llvm/Support/SlowOperationInformer.h65
-rw-r--r--include/llvm/Support/SourceMgr.h4
-rw-r--r--include/llvm/Support/StandardPasses.h3
-rw-r--r--include/llvm/Support/SystemUtils.h7
-rw-r--r--include/llvm/Support/TypeBuilder.h2
-rw-r--r--include/llvm/Support/raw_ostream.h125
-rw-r--r--include/llvm/System/Memory.h1
-rw-r--r--include/llvm/System/Path.h5
-rw-r--r--include/llvm/System/Process.h2
-rw-r--r--include/llvm/System/Program.h3
-rw-r--r--include/llvm/System/Signals.h4
-rw-r--r--include/llvm/System/ThreadLocal.h13
-rw-r--r--include/llvm/Target/Target.td7
-rw-r--r--include/llvm/Target/TargetAsmParser.h14
-rw-r--r--include/llvm/Target/TargetCallingConv.td9
-rw-r--r--include/llvm/Target/TargetData.h42
-rw-r--r--include/llvm/Target/TargetInstrDesc.h14
-rw-r--r--include/llvm/Target/TargetInstrInfo.h53
-rw-r--r--include/llvm/Target/TargetLowering.h140
-rw-r--r--include/llvm/Target/TargetMachine.h38
-rw-r--r--include/llvm/Target/TargetOpcodes.h2
-rw-r--r--include/llvm/Target/TargetOptions.h23
-rw-r--r--include/llvm/Target/TargetRegisterInfo.h67
-rw-r--r--include/llvm/Target/TargetRegistry.h17
-rw-r--r--include/llvm/Target/TargetSelect.h25
-rw-r--r--include/llvm/Transforms/IPO.h5
-rw-r--r--include/llvm/Transforms/IPO/InlinerPass.h4
-rw-r--r--include/llvm/Transforms/Scalar.h39
-rw-r--r--include/llvm/Transforms/Utils/Cloning.h15
-rw-r--r--include/llvm/Transforms/Utils/Local.h2
-rw-r--r--include/llvm/Transforms/Utils/SSAUpdater.h18
-rw-r--r--include/llvm/Transforms/Utils/SSI.h93
-rw-r--r--include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h2
-rw-r--r--include/llvm/Transforms/Utils/ValueMapper.h (renamed from lib/Transforms/Utils/ValueMapper.h)12
-rw-r--r--include/llvm/Type.h18
-rw-r--r--include/llvm/Use.h24
-rw-r--r--include/llvm/Value.h6
-rw-r--r--include/llvm/ValueSymbolTable.h88
-rw-r--r--lib/Analysis/AliasAnalysis.cpp233
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp20
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp50
-rw-r--r--lib/Analysis/AliasDebugger.cpp17
-rw-r--r--lib/Analysis/AliasSetTracker.cpp157
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp584
-rw-r--r--lib/Analysis/CFGPrinter.cpp30
-rw-r--r--lib/Analysis/CMakeLists.txt3
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/ConstantFolding.cpp4
-rw-r--r--lib/Analysis/DbgInfoPrinter.cpp6
-rw-r--r--lib/Analysis/DebugInfo.cpp192
-rw-r--r--lib/Analysis/DomPrinter.cpp65
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp15
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp12
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp4
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp81
-rw-r--r--lib/Analysis/IVUsers.cpp39
-rw-r--r--lib/Analysis/InlineCost.cpp8
-rw-r--r--lib/Analysis/InstCount.cpp6
-rw-r--r--lib/Analysis/IntervalPartition.cpp6
-rw-r--r--lib/Analysis/LazyValueInfo.cpp583
-rw-r--r--lib/Analysis/LibCallAliasAnalysis.cpp14
-rw-r--r--lib/Analysis/LibCallSemantics.cpp3
-rw-r--r--lib/Analysis/Lint.cpp11
-rw-r--r--lib/Analysis/LiveValues.cpp6
-rw-r--r--lib/Analysis/LoopDependenceAnalysis.cpp4
-rw-r--r--lib/Analysis/LoopInfo.cpp45
-rw-r--r--lib/Analysis/LoopPass.cpp16
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp44
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp7
-rw-r--r--lib/Analysis/PointerTracking.cpp55
-rw-r--r--lib/Analysis/PostDominators.cpp8
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp14
-rw-r--r--lib/Analysis/ProfileInfo.cpp13
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp14
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp8
-rw-r--r--lib/Analysis/RegionInfo.cpp749
-rw-r--r--lib/Analysis/RegionPrinter.cpp186
-rw-r--r--lib/Analysis/ScalarEvolution.cpp743
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp17
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp74
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp114
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp191
-rw-r--r--lib/Analysis/ValueTracking.cpp238
-rw-r--r--lib/AsmParser/LLLexer.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp161
-rw-r--r--lib/AsmParser/LLParser.h14
-rw-r--r--lib/AsmParser/LLToken.h4
-rw-r--r--lib/AsmParser/Parser.cpp3
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp119
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h12
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp136
-rw-r--r--lib/Bitcode/Writer/BitcodeWriterPass.cpp2
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp139
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h9
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp32
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h10
-rw-r--r--lib/CodeGen/Analysis.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp88
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp13
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp862
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h28
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp2
-rw-r--r--lib/CodeGen/BranchFolding.cpp2
-rw-r--r--lib/CodeGen/CMakeLists.txt6
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp276
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp27
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp21
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp282
-rw-r--r--lib/CodeGen/ELF.h120
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp2
-rw-r--r--lib/CodeGen/ELFWriter.cpp30
-rw-r--r--lib/CodeGen/ELFWriter.h13
-rw-r--r--lib/CodeGen/GCMetadata.cpp12
-rw-r--r--lib/CodeGen/GCStrategy.cpp9
-rw-r--r--lib/CodeGen/IfConversion.cpp5
-rw-r--r--lib/CodeGen/InlineSpiller.cpp118
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp3
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp81
-rw-r--r--lib/CodeGen/LiveInterval.cpp164
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp79
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp3
-rw-r--r--lib/CodeGen/LiveVariables.cpp18
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp354
-rw-r--r--lib/CodeGen/LowerSubregs.cpp22
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp32
-rw-r--r--lib/CodeGen/MachineCSE.cpp41
-rw-r--r--lib/CodeGen/MachineDominators.cpp8
-rw-r--r--lib/CodeGen/MachineFunction.cpp3
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp4
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp33
-rw-r--r--lib/CodeGen/MachineLICM.cpp21
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp6
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp15
-rw-r--r--lib/CodeGen/MachineSink.cpp138
-rw-r--r--lib/CodeGen/MachineVerifier.cpp230
-rw-r--r--lib/CodeGen/OptimizeExts.cpp220
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp20
-rw-r--r--lib/CodeGen/PBQP/HeuristicBase.h8
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h9
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h8
-rw-r--r--lib/CodeGen/PBQP/Solution.h31
-rw-r--r--lib/CodeGen/PHIElimination.cpp36
-rw-r--r--lib/CodeGen/PHIElimination.h10
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp287
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp7
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp15
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp18
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp243
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h9
-rw-r--r--lib/CodeGen/RegAllocFast.cpp136
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp154
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp47
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp3
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp34
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp1014
-rw-r--r--lib/CodeGen/RenderMachineFunction.h336
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h24
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp390
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp421
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp638
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h37
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp44
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp182
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp161
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h3
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--lib/CodeGen/SlotIndexes.cpp3
-rw-r--r--lib/CodeGen/Spiller.cpp98
-rw-r--r--lib/CodeGen/Spiller.h19
-rw-r--r--lib/CodeGen/SplitKit.cpp1097
-rw-r--r--lib/CodeGen/SplitKit.h321
-rw-r--r--lib/CodeGen/Splitter.cpp817
-rw-r--r--lib/CodeGen/Splitter.h99
-rw-r--r--lib/CodeGen/StackProtector.cpp8
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp9
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp9
-rw-r--r--lib/CodeGen/TailDuplication.cpp19
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp13
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp6
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp50
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--lib/CodeGen/VirtRegMap.cpp3
-rw-r--r--lib/CodeGen/VirtRegMap.h7
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp103
-rw-r--r--lib/CompilerDriver/Action.cpp13
-rw-r--r--lib/CompilerDriver/BuiltinOptions.cpp4
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp289
-rw-r--r--lib/CompilerDriver/Main.cpp123
-rw-r--r--lib/CompilerDriver/Makefile36
-rw-r--r--lib/CompilerDriver/Plugin.cpp78
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp4
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp12
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp440
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.h16
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp307
-rw-r--r--lib/Linker/LinkModules.cpp180
-rw-r--r--lib/MC/CMakeLists.txt3
-rw-r--r--lib/MC/ELFObjectWriter.cpp973
-rw-r--r--lib/MC/MCAsmInfo.cpp2
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp3
-rw-r--r--lib/MC/MCAsmStreamer.cpp3
-rw-r--r--lib/MC/MCAssembler.cpp74
-rw-r--r--lib/MC/MCContext.cpp88
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt7
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.cpp (renamed from tools/edis/EDDisassembler.cpp)43
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.h (renamed from tools/edis/EDDisassembler.h)37
-rw-r--r--lib/MC/MCDisassembler/EDInfo.h73
-rw-r--r--lib/MC/MCDisassembler/EDInst.cpp (renamed from tools/edis/EDInst.cpp)2
-rw-r--r--lib/MC/MCDisassembler/EDInst.h (renamed from tools/edis/EDInst.h)21
-rw-r--r--lib/MC/MCDisassembler/EDOperand.cpp (renamed from tools/edis/EDOperand.cpp)8
-rw-r--r--lib/MC/MCDisassembler/EDOperand.h (renamed from tools/edis/EDOperand.h)19
-rw-r--r--lib/MC/MCDisassembler/EDToken.cpp (renamed from tools/edis/EDToken.cpp)8
-rw-r--r--lib/MC/MCDisassembler/EDToken.h (renamed from tools/edis/EDToken.h)12
-rw-r--r--lib/MC/MCDisassembler/Makefile14
-rw-r--r--lib/MC/MCDwarf.cpp21
-rw-r--r--lib/MC/MCELFStreamer.cpp408
-rw-r--r--lib/MC/MCMachOStreamer.cpp277
-rw-r--r--lib/MC/MCNullStreamer.cpp1
-rw-r--r--lib/MC/MCObjectStreamer.cpp48
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp28
-rw-r--r--lib/MC/MCParser/AsmParser.cpp700
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp237
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp235
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp15
-rw-r--r--lib/MC/MCParser/TargetAsmParser.cpp2
-rw-r--r--lib/MC/MCStreamer.cpp3
-rw-r--r--lib/MC/MachObjectWriter.cpp2
-rw-r--r--lib/MC/Makefile2
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp737
-rw-r--r--lib/MC/WinCOFFStreamer.cpp229
-rw-r--r--lib/Support/APFloat.cpp1
-rw-r--r--lib/Support/APInt.cpp13
-rw-r--r--lib/Support/CMakeLists.txt2
-rw-r--r--lib/Support/ConstantRange.cpp124
-rw-r--r--lib/Support/CrashRecoveryContext.cpp204
-rw-r--r--lib/Support/ErrorHandling.cpp34
-rw-r--r--lib/Support/FoldingSet.cpp89
-rw-r--r--lib/Support/PrettyStackTrace.cpp5
-rw-r--r--lib/Support/SlowOperationInformer.cpp67
-rw-r--r--lib/Support/SmallVector.cpp2
-rw-r--r--lib/Support/Statistic.cpp14
-rw-r--r--lib/Support/StringRef.cpp27
-rw-r--r--lib/Support/SystemUtils.cpp10
-rw-r--r--lib/Support/Triple.cpp243
-rw-r--r--lib/Support/raw_ostream.cpp97
-rw-r--r--lib/System/DynamicLibrary.cpp6
-rw-r--r--lib/System/Path.cpp40
-rw-r--r--lib/System/RWMutex.cpp18
-rw-r--r--lib/System/ThreadLocal.cpp5
-rw-r--r--lib/System/Unix/Path.inc12
-rw-r--r--lib/System/Unix/Signals.inc44
-rw-r--r--lib/System/Unix/ThreadLocal.inc1
-rw-r--r--lib/System/Win32/Path.inc16
-rw-r--r--lib/System/Win32/Signals.inc14
-rw-r--r--lib/System/Win32/ThreadLocal.inc4
-rw-r--r--lib/Target/ARM/ARM.h60
-rw-r--r--lib/Target/ARM/ARM.td76
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp)171
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp134
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h81
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp563
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--lib/Target/ARM/ARMCallingConv.td4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp72
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp96
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp359
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp665
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp212
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp371
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp635
-rw-r--r--lib/Target/ARM/ARMISelLowering.h42
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td296
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td406
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td627
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td19
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td730
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td76
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp139
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp)0
-rw-r--r--lib/Target/ARM/ARMMCInstLower.h (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.h)0
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td160
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h22
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp14
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp245
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp66
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/CMakeLists.txt10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp13
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp183
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h66
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h227
-rw-r--r--lib/Target/ARM/Makefile5
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp147
-rw-r--r--lib/Target/ARM/README.txt42
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp279
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h22
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp68
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp2
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp2
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp26
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h6
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp6
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h5
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp28
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp16
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h7
-rw-r--r--lib/Target/CBackend/CBackend.cpp12
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td24
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp44
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp105
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp142
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp11
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h5
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp17
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp10
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeCallingConv.td2
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp35
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h6
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td12
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td2
-rw-r--r--lib/Target/MSIL/CMakeLists.txt3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1706
-rw-r--r--lib/Target/MSIL/MSILWriter.h258
-rw-r--r--lib/Target/MSIL/Makefile16
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/MSIL/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp26
-rw-r--r--lib/Target/MSIL/TargetInfo/Makefile15
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp7
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp23
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp12
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h7
-rw-r--r--lib/Target/Mangler.cpp3
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp20
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/MipsCallingConv.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp11
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp8
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td2
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h6
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h5
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td2
-rw-r--r--lib/Target/Mips/MipsSchedule.td2
-rw-r--r--lib/Target/PIC16/CMakeLists.txt2
-rw-r--r--lib/Target/PIC16/PIC16.h7
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp10
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h5
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp15
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp5
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.h2
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp9
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td19
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp22
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/README.txt16
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--lib/Target/Sparc/FPMover.cpp2
-rw-r--r--lib/Target/Sparc/Sparc.td2
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp8
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp40
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h6
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp25
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h7
-rw-r--r--lib/Target/TargetData.cpp62
-rw-r--r--lib/Target/TargetMachine.cpp20
-rw-r--r--lib/Target/TargetRegisterInfo.cpp14
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp274
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h3
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.cpp232
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.h25
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/CMakeLists.txt10
-rw-r--r--lib/Target/X86/README-FPStack.txt4
-rw-r--r--lib/Target/X86/README-SSE.txt42
-rw-r--r--lib/Target/X86/README.txt104
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp2
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp45
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp)51
-rw-r--r--lib/Target/X86/X86AsmPrinter.h (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.h)6
-rw-r--r--lib/Target/X86/X86CallingConv.td32
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp119
-rw-r--r--lib/Target/X86/X86FastISel.cpp29
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp473
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp153
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp37
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1207
-rw-r--r--lib/Target/X86/X86ISelLowering.h58
-rw-r--r--lib/Target/X86/X86Instr64bit.td156
-rw-r--r--lib/Target/X86/X86InstrFMA.td60
-rw-r--r--lib/Target/X86/X86InstrFPStack.td6
-rw-r--r--lib/Target/X86/X86InstrFormats.td33
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td80
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp692
-rw-r--r--lib/Target/X86/X86InstrInfo.h30
-rw-r--r--lib/Target/X86/X86InstrInfo.td145
-rw-r--r--lib/Target/X86/X86InstrMMX.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td1945
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp3
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp57
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.cpp)119
-rw-r--r--lib/Target/X86/X86MCInstLower.h (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.h)13
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp103
-rw-r--r--lib/Target/X86/X86RegisterInfo.h9
-rw-r--r--lib/Target/X86/X86RegisterInfo.td17
-rw-r--r--lib/Target/X86/X86ShuffleDecode.h155
-rw-r--r--lib/Target/X86/X86Subtarget.cpp10
-rw-r--r--lib/Target/X86/X86Subtarget.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp44
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp12
-rw-r--r--lib/Target/XCore/CMakeLists.txt2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp21
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp29
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td12
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h5
-rw-r--r--lib/Transforms/Hello/Hello.cpp11
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp13
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp34
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp26
-rw-r--r--lib/Transforms/IPO/DeadTypeElimination.cpp4
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp157
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp12
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp5
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp12
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp10
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp6
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp8
-rw-r--r--lib/Transforms/IPO/Inliner.cpp12
-rw-r--r--lib/Transforms/IPO/Internalize.cpp8
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp23
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp658
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PartialSpecialization.cpp46
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp6
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp6
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp36
-rw-r--r--lib/Transforms/IPO/StructRetPromotion.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp46
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp35
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp287
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp22
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp307
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp4
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp6
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp8
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp1112
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp4
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt3
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp36
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp200
-rw-r--r--lib/Transforms/Scalar/DCE.cpp10
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/GEPSplitter.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp15
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp18
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp217
-rw-r--r--lib/Transforms/Scalar/LICM.cpp728
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp34
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp182
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp21
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp30
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp161
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp21
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp5
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp8
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp41
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp49
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp10
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp27
-rw-r--r--lib/Transforms/Scalar/Sink.cpp5
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp4
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp68
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp25
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp4
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp10
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp21
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp86
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp30
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp11
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp8
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp27
-rw-r--r--lib/Transforms/Utils/Local.cpp3
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp6
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp11
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp16
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp17
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp40
-rw-r--r--lib/Transforms/Utils/SSI.cpp432
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp56
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp6
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp84
-rw-r--r--lib/VMCore/AsmWriter.cpp167
-rw-r--r--lib/VMCore/AutoUpgrade.cpp217
-rw-r--r--lib/VMCore/CMakeLists.txt1
-rw-r--r--lib/VMCore/ConstantFold.cpp40
-rw-r--r--lib/VMCore/Constants.cpp121
-rw-r--r--lib/VMCore/ConstantsContext.h16
-rw-r--r--lib/VMCore/Core.cpp92
-rw-r--r--lib/VMCore/Dominators.cpp147
-rw-r--r--lib/VMCore/Globals.cpp9
-rw-r--r--lib/VMCore/InlineAsm.cpp2
-rw-r--r--lib/VMCore/Instruction.cpp4
-rw-r--r--lib/VMCore/Instructions.cpp49
-rw-r--r--lib/VMCore/LLVMContext.cpp13
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp3
-rw-r--r--lib/VMCore/LLVMContextImpl.h5
-rw-r--r--lib/VMCore/Metadata.cpp170
-rw-r--r--lib/VMCore/Module.cpp23
-rw-r--r--lib/VMCore/Pass.cpp293
-rw-r--r--lib/VMCore/PassManager.cpp330
-rw-r--r--lib/VMCore/PassRegistry.cpp159
-rw-r--r--lib/VMCore/PrintModulePass.cpp18
-rw-r--r--lib/VMCore/Type.cpp265
-rw-r--r--lib/VMCore/TypesContext.h72
-rw-r--r--lib/VMCore/Use.cpp19
-rw-r--r--lib/VMCore/Value.cpp35
-rw-r--r--lib/VMCore/ValueSymbolTable.cpp2
-rw-r--r--lib/VMCore/Verifier.cpp125
-rw-r--r--runtime/libprofile/Makefile8
-rw-r--r--runtime/libprofile/libprofile.exports (renamed from runtime/libprofile/exported_symbols.lst)0
-rw-r--r--test/Analysis/BasicAA/args-rets-allocas-loads.ll220
-rw-r--r--test/Analysis/BasicAA/constant-over-index.ll5
-rw-r--r--test/Analysis/BasicAA/featuretest.ll50
-rw-r--r--test/Analysis/BasicAA/gep-alias.ll12
-rw-r--r--test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll26
-rw-r--r--test/Analysis/BasicAA/modref.ll11
-rw-r--r--test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll20
-rw-r--r--test/Analysis/RegionInfo/block_sort.ll42
-rw-r--r--test/Analysis/RegionInfo/cond_loop.ll33
-rw-r--r--test/Analysis/RegionInfo/condition_complicated.ll60
-rw-r--r--test/Analysis/RegionInfo/condition_complicated_2.ll44
-rw-r--r--test/Analysis/RegionInfo/condition_forward_edge.ll26
-rw-r--r--test/Analysis/RegionInfo/condition_same_exit.ll31
-rw-r--r--test/Analysis/RegionInfo/condition_simple.ll28
-rw-r--r--test/Analysis/RegionInfo/dg.exp (renamed from test/Transforms/ABCD/dg.exp)0
-rw-r--r--test/Analysis/RegionInfo/exit_in_condition.ll38
-rw-r--r--test/Analysis/RegionInfo/infinite_loop.ll20
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_2.ll36
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_3.ll52
-rw-r--r--test/Analysis/RegionInfo/infinite_loop_4.ll48
-rw-r--r--test/Analysis/RegionInfo/loop_with_condition.ll46
-rw-r--r--test/Analysis/RegionInfo/loops_1.ll40
-rw-r--r--test/Analysis/RegionInfo/loops_2.ll49
-rw-r--r--test/Analysis/RegionInfo/mix_1.ll69
-rw-r--r--test/Analysis/RegionInfo/multiple_exiting_edge.ll38
-rw-r--r--test/Analysis/RegionInfo/nested_loops.ll33
-rw-r--r--test/Analysis/RegionInfo/next.ll49
-rw-r--r--test/Analysis/RegionInfo/paper.ll55
-rw-r--r--test/Analysis/RegionInfo/two_loops_same_header.ll46
-rw-r--r--test/Analysis/ScalarEvolution/avoid-smax-1.ll2
-rw-r--r--test/Analysis/ScalarEvolution/max-trip-count.ll42
-rw-r--r--test/Archive/README.txt2
-rw-r--r--test/Assembler/2010-01-06-UnionType.ll3
-rw-r--r--test/Assembler/align-inst-alloca.ll6
-rw-r--r--test/Assembler/align-inst-load.ll6
-rw-r--r--test/Assembler/align-inst-store.ll6
-rw-r--r--test/Assembler/align-inst.ll10
-rw-r--r--test/Assembler/comment.ll20
-rw-r--r--test/Assembler/getelementptr.ll4
-rw-r--r--test/Bindings/Ocaml/analysis.ml2
-rw-r--r--test/Bindings/Ocaml/bitreader.ml2
-rw-r--r--test/Bindings/Ocaml/bitwriter.ml2
-rw-r--r--test/Bindings/Ocaml/executionengine.ml2
-rw-r--r--test/Bindings/Ocaml/scalar_opts.ml6
-rw-r--r--test/Bindings/Ocaml/target.ml7
-rw-r--r--test/Bindings/Ocaml/vmcore.ml42
-rw-r--r--test/Bitcode/AutoUpgradeGlobals.ll3
-rw-r--r--test/Bitcode/AutoUpgradeGlobals.ll.bcbin0 -> 312 bytes
-rw-r--r--test/Bitcode/neon-intrinsics.ll213
-rw-r--r--test/Bitcode/neon-intrinsics.ll.bcbin0 -> 5764 bytes
-rw-r--r--test/BugPoint/crash-narrowfunctiontest.ll4
-rw-r--r--test/BugPoint/metadata.ll35
-rw-r--r--test/BugPoint/remove_arguments_test.ll4
-rw-r--r--test/CMakeLists.txt17
-rw-r--r--test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll6
-rw-r--r--test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll2
-rw-r--r--test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll2
-rw-r--r--test/CodeGen/ARM/2009-12-02-vtrn-undef.ll14
-rw-r--r--test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll22
-rw-r--r--test/CodeGen/ARM/2010-05-21-BuildVector.ll4
-rw-r--r--test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll4
-rwxr-xr-xtest/CodeGen/ARM/2010-06-21-nondarwin-tc.ll1
-rw-r--r--test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll4
-rw-r--r--test/CodeGen/ARM/2010-07-26-GlobalMerge.ll95
-rw-r--r--test/CodeGen/ARM/2010-08-04-EHCrash.ll65
-rw-r--r--test/CodeGen/ARM/arguments.ll42
-rw-r--r--test/CodeGen/ARM/bfi.ll40
-rw-r--r--test/CodeGen/ARM/call-tc.ll1
-rw-r--r--test/CodeGen/ARM/code-placement.ll29
-rw-r--r--test/CodeGen/ARM/div.ll10
-rw-r--r--test/CodeGen/ARM/fast-isel.ll39
-rw-r--r--test/CodeGen/ARM/fnmuls.ll12
-rw-r--r--test/CodeGen/ARM/fpcmp-opt.ll2
-rw-r--r--test/CodeGen/ARM/fpowi.ll2
-rw-r--r--test/CodeGen/ARM/long_shift.ll6
-rw-r--r--test/CodeGen/ARM/lsr-on-unrolled-loops.ll19
-rw-r--r--test/CodeGen/ARM/pack.ll32
-rw-r--r--test/CodeGen/ARM/reg_sequence.ll73
-rw-r--r--test/CodeGen/ARM/remat.ll2
-rw-r--r--test/CodeGen/ARM/select.ll25
-rw-r--r--test/CodeGen/ARM/spill-q.ll8
-rw-r--r--test/CodeGen/ARM/t2-imm.ll6
-rw-r--r--test/CodeGen/ARM/vaba.ll128
-rw-r--r--test/CodeGen/ARM/vabd.ll38
-rw-r--r--test/CodeGen/ARM/vadd.ll82
-rw-r--r--test/CodeGen/ARM/vext.ll20
-rw-r--r--test/CodeGen/ARM/vld1.ll53
-rw-r--r--test/CodeGen/ARM/vld2.ll36
-rw-r--r--test/CodeGen/ARM/vld3.ll36
-rw-r--r--test/CodeGen/ARM/vld4.ll36
-rw-r--r--test/CodeGen/ARM/vldlane.ll84
-rw-r--r--test/CodeGen/ARM/vmla.ll78
-rw-r--r--test/CodeGen/ARM/vmls.ll78
-rw-r--r--test/CodeGen/ARM/vmov.ll30
-rw-r--r--test/CodeGen/ARM/vmul.ll68
-rw-r--r--test/CodeGen/ARM/vrev.ll18
-rw-r--r--test/CodeGen/ARM/vst1.ll40
-rw-r--r--test/CodeGen/ARM/vst2.ll36
-rw-r--r--test/CodeGen/ARM/vst3.ll38
-rw-r--r--test/CodeGen/ARM/vst4.ll36
-rw-r--r--test/CodeGen/ARM/vstlane.ll84
-rw-r--r--test/CodeGen/ARM/vsub.ll82
-rw-r--r--test/CodeGen/ARM/vtrn.ll27
-rw-r--r--test/CodeGen/ARM/vuzp.ll27
-rw-r--r--test/CodeGen/ARM/vzip.ll27
-rw-r--r--test/CodeGen/Alpha/2010-08-01-mulreduce64.ll11
-rw-r--r--test/CodeGen/CellSPU/arg_ret.ll33
-rw-r--r--test/CodeGen/CellSPU/bigstack.ll6
-rw-r--r--test/CodeGen/CellSPU/call.ll26
-rw-r--r--test/CodeGen/CellSPU/call_indirect.ll2
-rw-r--r--test/CodeGen/CellSPU/shuffles.ll23
-rw-r--r--test/CodeGen/CellSPU/v2f32.ll75
-rw-r--r--test/CodeGen/CellSPU/v2i32.ll64
-rw-r--r--test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll6
-rw-r--r--test/CodeGen/Mips/2008-06-05-Carry.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-03-SRet.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-05-ByVal.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-06-fadd64.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-07-FPExtend.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-07-Float2Int.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-15-InternalConstant.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-15-SmallSection.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-16-SignExtInReg.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-22-Cstpool.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-23-fpcmp.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-29-icmp.ll2
-rw-r--r--test/CodeGen/Mips/2008-07-31-fcopysign.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-01-AsmInline.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-03-fabs64.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-04-Bitconvert.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-06-Alloca.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-07-CC.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-07-FPRound.ll2
-rw-r--r--test/CodeGen/Mips/2008-08-08-ctlz.ll2
-rw-r--r--test/CodeGen/Mips/2010-07-20-Select.ll21
-rw-r--r--test/CodeGen/Mips/2010-07-20-Switch.ll33
-rw-r--r--test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll8
-rw-r--r--test/CodeGen/PowerPC/empty-functions.ll12
-rw-r--r--test/CodeGen/PowerPC/vec_constants.ll18
-rw-r--r--test/CodeGen/SystemZ/05-MemLoadsStores.ll2
-rw-r--r--test/CodeGen/SystemZ/05-MemLoadsStores16.ll2
-rw-r--r--test/CodeGen/SystemZ/07-BrUnCond.ll2
-rw-r--r--test/CodeGen/SystemZ/09-DynamicAlloca.ll2
-rw-r--r--test/CodeGen/SystemZ/09-Globals.ll2
-rw-r--r--test/CodeGen/SystemZ/10-FuncsPic.ll2
-rw-r--r--test/CodeGen/SystemZ/10-GlobalsPic.ll2
-rw-r--r--test/CodeGen/SystemZ/11-BSwap.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-06-02-Rotate.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-07-04-Shl32.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-07-05-Shifts.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll2
-rw-r--r--test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll2
-rw-r--r--test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll2
-rw-r--r--test/CodeGen/Thumb/2010-07-15-debugOrdering.ll147
-rw-r--r--test/CodeGen/Thumb/barrier.ll24
-rw-r--r--test/CodeGen/Thumb/dyn-stackalloc.ll2
-rw-r--r--test/CodeGen/Thumb/large-stack.ll29
-rw-r--r--test/CodeGen/Thumb/vargs.ll2
-rw-r--r--test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll4
-rw-r--r--test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll13
-rw-r--r--test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll53
-rw-r--r--test/CodeGen/Thumb2/bfi.ll40
-rw-r--r--test/CodeGen/Thumb2/cortex-fp.ll24
-rw-r--r--test/CodeGen/Thumb2/crash.ll6
-rw-r--r--test/CodeGen/Thumb2/div.ll2
-rw-r--r--test/CodeGen/Thumb2/ldr-str-imm12.ll8
-rw-r--r--test/CodeGen/Thumb2/lsr-deficiency.ll4
-rw-r--r--test/CodeGen/Thumb2/machine-licm-vdup.ll38
-rw-r--r--test/CodeGen/Thumb2/machine-licm.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-and2.ll2
-rw-r--r--test/CodeGen/Thumb2/thumb2-badreg-operands.ll15
-rw-r--r--test/CodeGen/Thumb2/thumb2-barrier.ll17
-rw-r--r--test/CodeGen/Thumb2/thumb2-call-tc.ll1
-rw-r--r--test/CodeGen/Thumb2/thumb2-cmp.ll14
-rw-r--r--test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll1
-rw-r--r--test/CodeGen/Thumb2/thumb2-pack.ll32
-rw-r--r--test/CodeGen/Thumb2/thumb2-spill-q.ll8
-rw-r--r--test/CodeGen/Thumb2/thumb2-uxtb.ll2
-rw-r--r--test/CodeGen/X86/2006-05-22-FPSetEQ.ll2
-rw-r--r--test/CodeGen/X86/2007-06-14-branchfold.ll133
-rw-r--r--test/CodeGen/X86/2008-01-25-EmptyFunction.ll8
-rw-r--r--test/CodeGen/X86/2008-08-06-CmpStride.ll (renamed from test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll)0
-rw-r--r--test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll (renamed from test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll)0
-rw-r--r--test/CodeGen/X86/2009-02-26-MachineLICMBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-13-PHIElimBug.ll2
-rw-r--r--test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll2
-rw-r--r--test/CodeGen/X86/2009-03-23-MultiUseSched.ll2
-rw-r--r--test/CodeGen/X86/2010-01-18-DbgValue.ll (renamed from test/DebugInfo/2010-01-18-DbgValue.ll)5
-rw-r--r--test/CodeGen/X86/2010-02-01-DbgValueCrash.ll (renamed from test/DebugInfo/2010-02-01-DbgValueCrash.ll)1
-rw-r--r--test/CodeGen/X86/2010-05-25-DotDebugLoc.ll (renamed from test/DebugInfo/2010-05-25-DotDebugLoc.ll)2
-rw-r--r--test/CodeGen/X86/2010-05-28-Crash.ll (renamed from test/DebugInfo/2010-05-28-Crash.ll)0
-rw-r--r--test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll (renamed from test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll)0
-rw-r--r--test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll28
-rw-r--r--test/CodeGen/X86/2010-07-15-Crash.ll12
-rw-r--r--test/CodeGen/X86/2010-07-29-SetccSimplify.ll14
-rw-r--r--test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll36
-rw-r--r--test/CodeGen/X86/2010-08-04-MingWCrash.ll39
-rw-r--r--test/CodeGen/X86/2010-08-10-DbgConstant.ll25
-rw-r--r--test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll28
-rw-r--r--test/CodeGen/X86/GC/dg.exp4
-rw-r--r--test/CodeGen/X86/MachineSink-PHIUse.ll39
-rw-r--r--test/CodeGen/X86/avx-128.ll12
-rw-r--r--test/CodeGen/X86/avx-256.ll15
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86.ll2587
-rw-r--r--test/CodeGen/X86/avx-intrinsics-x86_64.ll50
-rw-r--r--test/CodeGen/X86/barrier-sse.ll21
-rw-r--r--test/CodeGen/X86/barrier.ll7
-rw-r--r--test/CodeGen/X86/call-imm.ll2
-rw-r--r--test/CodeGen/X86/change-compare-stride-trickiness-0.ll (renamed from test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll)0
-rw-r--r--test/CodeGen/X86/change-compare-stride-trickiness-1.ll (renamed from test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll)0
-rw-r--r--test/CodeGen/X86/change-compare-stride-trickiness-2.ll (renamed from test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll)0
-rw-r--r--test/CodeGen/X86/constant-pool-remat-0.ll4
-rw-r--r--test/CodeGen/X86/critical-edge-split.ll2
-rw-r--r--test/CodeGen/X86/dllexport.ll2
-rw-r--r--test/CodeGen/X86/dyn-stackalloc.ll6
-rw-r--r--test/CodeGen/X86/empty-functions.ll15
-rw-r--r--test/CodeGen/X86/fabs.ll2
-rw-r--r--test/CodeGen/X86/fast-isel-atomic.ll17
-rw-r--r--test/CodeGen/X86/fast-isel-cmp-branch.ll29
-rw-r--r--test/CodeGen/X86/fast-isel-gep.ll4
-rw-r--r--test/CodeGen/X86/fast-isel-shift-imm.ll2
-rw-r--r--test/CodeGen/X86/force-align-stack.ll21
-rw-r--r--test/CodeGen/X86/insert-positions.ll (renamed from test/Transforms/LoopStrengthReduce/insert-positions.ll)0
-rw-r--r--test/CodeGen/X86/int-intrinsic.ll20
-rw-r--r--test/CodeGen/X86/licm-nested.ll2
-rw-r--r--test/CodeGen/X86/lock-inst-encoding.ll22
-rw-r--r--test/CodeGen/X86/loop-strength-reduce4.ll4
-rw-r--r--test/CodeGen/X86/lsr-interesting-step.ll51
-rw-r--r--test/CodeGen/X86/lsr-normalization.ll99
-rw-r--r--test/CodeGen/X86/lsr-reuse.ll4
-rw-r--r--test/CodeGen/X86/lsr-static-addr.ll31
-rw-r--r--test/CodeGen/X86/lsr-wrap.ll2
-rw-r--r--test/CodeGen/X86/narrow_op-2.ll25
-rw-r--r--test/CodeGen/X86/phi-immediate-factoring.ll2
-rw-r--r--test/CodeGen/X86/pr7882.ll17
-rw-r--r--test/CodeGen/X86/shl-anyext.ll40
-rw-r--r--test/CodeGen/X86/sibcall.ll2
-rw-r--r--test/CodeGen/X86/sse-minmax.ll4
-rw-r--r--test/CodeGen/X86/sse1.ll45
-rw-r--r--test/CodeGen/X86/sse2.ll168
-rw-r--r--test/CodeGen/X86/sse41.ll25
-rw-r--r--test/CodeGen/X86/stack-color-with-reg.ll361
-rw-r--r--test/CodeGen/X86/stdcall.ll2
-rw-r--r--test/CodeGen/X86/store-narrow.ll31
-rw-r--r--test/CodeGen/X86/tailcall-fastisel.ll14
-rw-r--r--test/CodeGen/X86/twoaddr-coalesce.ll2
-rw-r--r--test/CodeGen/X86/v2f32.ll57
-rw-r--r--test/CodeGen/X86/vec_cast.ll21
-rw-r--r--test/CodeGen/X86/vec_insert-6.ll2
-rw-r--r--test/CodeGen/X86/vec_insert-9.ll2
-rw-r--r--test/CodeGen/X86/vec_shift4.ll25
-rw-r--r--test/CodeGen/X86/vec_shuffle-10.ll25
-rw-r--r--test/CodeGen/X86/vec_shuffle-19.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-20.ll2
-rw-r--r--test/CodeGen/X86/vec_shuffle-24.ll3
-rw-r--r--test/CodeGen/X86/vec_shuffle-3.ll20
-rw-r--r--test/CodeGen/X86/vec_shuffle-37.ll14
-rw-r--r--test/CodeGen/X86/vec_shuffle-4.ll12
-rw-r--r--test/CodeGen/X86/vec_shuffle-5.ll13
-rw-r--r--test/CodeGen/X86/vec_shuffle-6.ll42
-rw-r--r--test/CodeGen/X86/vec_shuffle-7.ll11
-rw-r--r--test/CodeGen/X86/vec_shuffle-8.ll10
-rw-r--r--test/CodeGen/X86/vec_shuffle-9.ll21
-rw-r--r--test/CodeGen/X86/widen_shuffle-1.ll8
-rw-r--r--test/CodeGen/X86/win_chkstk.ll45
-rw-r--r--test/CodeGen/X86/zero-remat.ll2
-rw-r--r--test/DebugInfo/2010-07-19-Crash.ll24
-rw-r--r--test/DebugInfo/2010-08-04-StackVariable.ll124
-rw-r--r--test/DebugInfo/printdbginfo2.ll2
-rw-r--r--test/Feature/NamedMDNode.ll2
-rw-r--r--test/Feature/linker_private_linkages.ll1
-rw-r--r--test/Feature/metadata.ll6
-rw-r--r--test/Feature/unions.ll14
-rw-r--r--test/FrontendC++/2009-07-15-LineNumbers.cpp2
-rw-r--r--test/FrontendC++/2010-07-19-nowarn.cpp21
-rw-r--r--test/FrontendC++/2010-07-23-DeclLoc.cpp86
-rw-r--r--test/FrontendC++/2010-08-31-ByValArg.cpp53
-rw-r--r--test/FrontendC/2008-03-24-BitField-And-Alloca.c2
-rw-r--r--test/FrontendC/2010-05-18-asmsched.c4
-rw-r--r--test/FrontendC/2010-07-14-overconservative-align.c4
-rw-r--r--test/FrontendC/2010-07-14-ref-off-end.c4
-rw-r--r--test/FrontendC/2010-07-27-MinNoFoldConst.c18
-rw-r--r--test/FrontendC/2010-08-12-asm-aggr-arg.c16
-rw-r--r--test/FrontendC/asm-reg-var-local.c32
-rw-r--r--test/FrontendC/cstring-align.c14
-rw-r--r--test/FrontendC/misaligned-param.c15
-rw-r--r--test/FrontendC/vla-1.c5
-rw-r--r--test/FrontendC/vla-2.c10
-rw-r--r--test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm27
-rw-r--r--test/FrontendObjC++/2010-08-04-Template.mm10
-rw-r--r--test/FrontendObjC++/2010-08-06-X.Y-syntax.mm16
-rw-r--r--test/FrontendObjC/2009-08-17-DebugInfo.m2
-rw-r--r--test/Integer/a15.ll27
-rw-r--r--test/Integer/a15.ll.out21
-rw-r--r--test/Integer/a17.ll25
-rw-r--r--test/Integer/a17.ll.out20
-rw-r--r--test/Integer/a31.ll25
-rw-r--r--test/Integer/a31.ll.out20
-rw-r--r--test/Integer/a33.ll26
-rw-r--r--test/Integer/a33.ll.out20
-rw-r--r--test/Integer/a63.ll25
-rw-r--r--test/Integer/a63.ll.out20
-rw-r--r--test/Integer/a7.ll31
-rw-r--r--test/Integer/a7.ll.out25
-rw-r--r--test/Integer/a9.ll25
-rw-r--r--test/Integer/a9.ll.out19
-rw-r--r--test/LLVMC/Alias.td24
-rw-r--r--test/LLVMC/AppendCmdHook.td4
-rw-r--r--test/LLVMC/EmptyCompilationGraph.td2
-rw-r--r--test/LLVMC/EnvParentheses.td4
-rw-r--r--test/LLVMC/ExternOptions.td26
-rw-r--r--test/LLVMC/ForwardAs.td6
-rw-r--r--test/LLVMC/ForwardTransformedValue.td12
-rw-r--r--test/LLVMC/ForwardValue.td12
-rw-r--r--test/LLVMC/HookWithArguments.td4
-rw-r--r--test/LLVMC/HookWithInFile.td4
-rw-r--r--test/LLVMC/Init.td4
-rw-r--r--test/LLVMC/LanguageMap.td29
-rw-r--r--test/LLVMC/MultiValuedOption.td6
-rw-r--r--test/LLVMC/MultipleCompilationGraphs.td2
-rw-r--r--test/LLVMC/MultiplePluginPriorities.td17
-rw-r--r--test/LLVMC/NoActions.td4
-rw-r--r--test/LLVMC/NoCompilationGraph.td2
-rw-r--r--test/LLVMC/OneOrMore.td4
-rw-r--r--test/LLVMC/OptionPreprocessor.td4
-rw-r--r--test/LLVMC/OutputSuffixHook.td8
-rw-r--r--test/LLVMC/TestWarnings.td2
-rw-r--r--test/Linker/metadata-a.ll15
-rw-r--r--test/Linker/metadata-b.ll9
-rw-r--r--test/MC/AsmParser/ARM/arm_instructions.s8
-rw-r--r--test/MC/AsmParser/ELF/dg.exp6
-rw-r--r--test/MC/AsmParser/ELF/directive_previous.s13
-rw-r--r--test/MC/AsmParser/ELF/directive_section.s23
-rw-r--r--test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s42
-rw-r--r--test/MC/AsmParser/X86/x86_32-avx-encoding.s3241
-rw-r--r--test/MC/AsmParser/X86/x86_32-encoding.s2889
-rw-r--r--test/MC/AsmParser/X86/x86_32-fma3-encoding.s674
-rw-r--r--test/MC/AsmParser/X86/x86_32-new-encoder.s8
-rw-r--r--test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s42
-rw-r--r--test/MC/AsmParser/X86/x86_64-avx-encoding.s3318
-rw-r--r--test/MC/AsmParser/X86/x86_64-encoding.s2892
-rw-r--r--test/MC/AsmParser/X86/x86_64-fma3-encoding.s674
-rw-r--r--test/MC/AsmParser/X86/x86_64-new-encoder.s13
-rw-r--r--test/MC/AsmParser/X86/x86_instruction_errors.s5
-rw-r--r--test/MC/AsmParser/X86/x86_instructions.s26
-rw-r--r--test/MC/AsmParser/X86/x86_operands.s2
-rw-r--r--test/MC/AsmParser/dg.exp5
-rw-r--r--test/MC/AsmParser/directive_abort.s6
-rw-r--r--test/MC/AsmParser/directive_elf_size.s8
-rw-r--r--test/MC/AsmParser/directive_values.s17
-rw-r--r--test/MC/AsmParser/dollars-in-identifiers.s7
-rw-r--r--test/MC/AsmParser/macro-def-in-instantiation.s13
-rw-r--r--test/MC/AsmParser/macros-parsing.s23
-rw-r--r--test/MC/AsmParser/macros.s39
-rw-r--r--test/MC/COFF/basic-coff.ll136
-rw-r--r--test/MC/COFF/dg.exp5
-rw-r--r--test/MC/COFF/switch-relocations.ll34
-rw-r--r--test/MC/COFF/symbol-fragment-offset.ll182
-rw-r--r--test/MC/Disassembler/arm-tests.txt34
-rw-r--r--test/MC/Disassembler/neon-tests.txt3
-rw-r--r--test/MC/Disassembler/thumb-tests.txt12
-rw-r--r--test/MC/ELF/bss.ll8
-rw-r--r--test/MC/ELF/dg.exp5
-rw-r--r--test/Makefile39
-rw-r--r--test/Other/close-stderr.ll9
-rw-r--r--test/Other/constant-fold-gep.ll42
-rw-r--r--test/Other/inline-asm-newline-terminator.ll1
-rw-r--r--test/Other/lint.ll4
-rwxr-xr-xtest/Scripts/coff-dump.py566
-rw-r--r--test/Scripts/coff-dump.py.bat4
-rw-r--r--test/TableGen/FieldAccess.td14
-rw-r--r--test/TableGen/ListManip.td10
-rwxr-xr-xtest/TestRunner.sh37
-rw-r--r--test/Transforms/ABCD/basic.ll27
-rw-r--r--test/Transforms/ConstProp/constant-expr.ll4
-rw-r--r--test/Transforms/ConstantMerge/dont-merge.ll14
-rw-r--r--test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll25
-rw-r--r--test/Transforms/CorrelatedValuePropagation/basic.ll83
-rw-r--r--test/Transforms/CorrelatedValuePropagation/dg.exp3
-rw-r--r--test/Transforms/GVN/2009-07-13-MemDepSortFail.ll2
-rw-r--r--test/Transforms/GlobalOpt/2008-07-17-addrspace.ll2
-rw-r--r--test/Transforms/GlobalOpt/crash.ll15
-rw-r--r--test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll4
-rw-r--r--test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll25
-rw-r--r--test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll2
-rw-r--r--test/Transforms/IndVarSimplify/crash.ll36
-rw-r--r--test/Transforms/IndVarSimplify/loop_evaluate10.ll2
-rw-r--r--test/Transforms/IndVarSimplify/uglygep.ll40
-rw-r--r--test/Transforms/InstCombine/align-addr.ll33
-rw-r--r--test/Transforms/InstCombine/align-inc.ll12
-rw-r--r--test/Transforms/InstCombine/bit-checks.ll11
-rw-r--r--test/Transforms/InstCombine/bitcast-scalar-to-vector.ll14
-rw-r--r--test/Transforms/InstCombine/bitcast.ll105
-rw-r--r--test/Transforms/InstCombine/intrinsics.ll34
-rw-r--r--test/Transforms/InstCombine/phi.ll21
-rw-r--r--test/Transforms/InstCombine/shift-simplify.ll42
-rw-r--r--test/Transforms/InstCombine/shift-trunc-shift.ll10
-rw-r--r--test/Transforms/InstCombine/shift.ll102
-rw-r--r--test/Transforms/InstCombine/sqrt.ll32
-rw-r--r--test/Transforms/InstCombine/trunc-mask-ext.ll38
-rw-r--r--test/Transforms/InstCombine/trunc.ll99
-rw-r--r--test/Transforms/InstCombine/urem-simplify-bug.ll2
-rw-r--r--test/Transforms/JumpThreading/2010-08-26-and.ll162
-rw-r--r--test/Transforms/JumpThreading/basic.ll68
-rw-r--r--test/Transforms/JumpThreading/crash.ll50
-rw-r--r--test/Transforms/JumpThreading/lvi-load.ll49
-rw-r--r--test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll4
-rw-r--r--test/Transforms/LICM/crash.ll61
-rw-r--r--test/Transforms/LICM/hoisting.ll16
-rw-r--r--test/Transforms/LICM/scalar_promote.ll59
-rw-r--r--test/Transforms/LICM/sinking.ll14
-rw-r--r--test/Transforms/LoopRotate/phi-duplicate.ll22
-rw-r--r--test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll20
-rw-r--r--test/Transforms/LoopSimplify/indirectbr-backedge.ll35
-rw-r--r--test/Transforms/LoopSimplify/preserve-scev.ll50
-rw-r--r--test/Transforms/LoopStrengthReduce/pr3571.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/uglygep.ll1
-rw-r--r--test/Transforms/LoopUnswitch/infinite-loop.ll53
-rw-r--r--test/Transforms/LowerAtomic/atomic-load.ll40
-rw-r--r--test/Transforms/LowerAtomic/atomic-swap.ll26
-rw-r--r--test/Transforms/LowerAtomic/barrier.ll10
-rw-r--r--test/Transforms/LowerAtomic/dg.exp (renamed from test/Transforms/SSI/dg.exp)0
-rw-r--r--test/Transforms/MergeFunc/vectors-and-arrays.ll18
-rw-r--r--test/Transforms/PartialSpecialize/two-specializations.ll22
-rw-r--r--test/Transforms/SCCP/ipsccp-addr-taken.ll28
-rw-r--r--test/Transforms/SSI/2009-07-09-Invoke.ll71
-rw-r--r--test/Transforms/SSI/2009-08-15-UnreachableBB.ll19
-rw-r--r--test/Transforms/SSI/2009-08-17-CritEdge.ll15
-rw-r--r--test/Transforms/SSI/2009-08-19-UnreachableBB2.ll22
-rw-r--r--test/Transforms/SSI/ssiphi.ll22
-rw-r--r--test/Transforms/ScalarRepl/vector_promote.ll43
-rw-r--r--test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll2
-rw-r--r--test/Transforms/SimplifyCFG/basictest.ll1
-rw-r--r--test/Transforms/SimplifyCFG/indirectbr.ll64
-rw-r--r--test/Transforms/StripSymbols/2010-08-25-crash.ll19
-rw-r--r--test/Transforms/TailCallElim/accum_recursion.ll65
-rw-r--r--test/Transforms/TailCallElim/accum_recursion_constant_arg.ll20
-rw-r--r--test/Transforms/TailCallElim/switch.ll34
-rw-r--r--test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll2
-rw-r--r--test/Verifier/2010-08-07-PointerIntrinsic.ll21
-rw-r--r--test/lit.cfg21
-rw-r--r--tools/CMakeLists.txt2
-rw-r--r--tools/Makefile20
-rw-r--r--tools/bugpoint-passes/CMakeLists.txt3
-rw-r--r--tools/bugpoint-passes/Makefile23
-rw-r--r--tools/bugpoint-passes/TestPasses.cpp (renamed from tools/bugpoint/TestPasses.cpp)16
-rw-r--r--tools/bugpoint-passes/bugpoint.exports0
-rw-r--r--tools/bugpoint/BugDriver.cpp29
-rw-r--r--tools/bugpoint/BugDriver.h67
-rw-r--r--tools/bugpoint/CMakeLists.txt1
-rw-r--r--tools/bugpoint/CrashDebugger.cpp73
-rw-r--r--tools/bugpoint/ExecutionDriver.cpp24
-rw-r--r--tools/bugpoint/ExtractFunction.cpp93
-rw-r--r--tools/bugpoint/FindBugs.cpp8
-rw-r--r--tools/bugpoint/Miscompilation.cpp157
-rw-r--r--tools/bugpoint/OptimizerDriver.cpp113
-rw-r--r--tools/bugpoint/ToolRunner.cpp12
-rw-r--r--tools/bugpoint/bugpoint.cpp26
-rw-r--r--tools/edis/CMakeLists.txt14
-rw-r--r--tools/edis/EDInfo.td1
-rw-r--r--tools/edis/EDMain.cpp158
-rw-r--r--tools/edis/Makefile30
-rw-r--r--tools/gold/README.txt4
-rw-r--r--tools/gold/gold-plugin.cpp40
-rw-r--r--tools/llc/llc.cpp153
-rw-r--r--tools/lli/lli.cpp3
-rw-r--r--tools/llvm-as/llvm-as.cpp18
-rw-r--r--tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp17
-rw-r--r--tools/llvm-config/CMakeLists.txt2
-rw-r--r--tools/llvm-diff/CMakeLists.txt6
-rw-r--r--tools/llvm-diff/DifferenceEngine.cpp676
-rw-r--r--tools/llvm-diff/DifferenceEngine.h179
-rw-r--r--tools/llvm-diff/Makefile (renamed from tools/llvmc/example/Skeleton/plugins/Plugin/Makefile)16
-rw-r--r--tools/llvm-diff/llvm-diff.cpp331
-rw-r--r--tools/llvm-dis/llvm-dis.cpp47
-rw-r--r--tools/llvm-extract/llvm-extract.cpp38
-rw-r--r--tools/llvm-ld/llvm-ld.cpp38
-rw-r--r--tools/llvm-link/llvm-link.cpp19
-rw-r--r--tools/llvm-mc/CMakeLists.txt3
-rw-r--r--tools/llvm-mc/Disassembler.cpp182
-rw-r--r--tools/llvm-mc/Disassembler.h7
-rw-r--r--tools/llvm-mc/Makefile4
-rw-r--r--tools/llvm-mc/llvm-mc.cpp177
-rw-r--r--tools/llvm-nm/llvm-nm.cpp15
-rw-r--r--tools/llvm-prof/llvm-prof.cpp15
-rw-r--r--tools/llvm-shlib/Makefile53
-rw-r--r--tools/llvmc/CMakeLists.txt2
-rw-r--r--tools/llvmc/Makefile8
-rw-r--r--tools/llvmc/doc/LLVMC-Reference.rst9
-rw-r--r--tools/llvmc/example/Hello/Hello.cpp33
-rw-r--r--tools/llvmc/example/Simple/PluginMain.cpp1
-rw-r--r--tools/llvmc/example/Skeleton/Makefile24
-rw-r--r--tools/llvmc/example/Skeleton/driver/Makefile13
-rw-r--r--tools/llvmc/example/Skeleton/plugins/Makefile18
-rw-r--r--tools/llvmc/example/Skeleton/plugins/Plugin/PluginMain.cpp1
-rw-r--r--tools/llvmc/example/mcc16/Makefile18
-rw-r--r--tools/llvmc/example/mcc16/driver/Makefile13
-rw-r--r--tools/llvmc/example/mcc16/plugins/Makefile18
-rw-r--r--tools/llvmc/examples/Hello/Hello.cpp29
-rw-r--r--tools/llvmc/examples/Hello/Makefile (renamed from tools/llvmc/example/Hello/Makefile)4
-rw-r--r--tools/llvmc/examples/Makefile (renamed from tools/llvmc/example/Simple/Makefile)7
-rw-r--r--tools/llvmc/examples/Simple/Makefile (renamed from tools/llvmc/plugins/Clang/Makefile)6
-rw-r--r--tools/llvmc/examples/Simple/Simple.cpp2
-rw-r--r--tools/llvmc/examples/Simple/Simple.td (renamed from tools/llvmc/example/Simple/Simple.td)24
-rw-r--r--tools/llvmc/examples/Skeleton/AutoGenerated.td (renamed from tools/llvmc/example/Skeleton/plugins/Plugin/Plugin.td)4
-rw-r--r--tools/llvmc/examples/Skeleton/Hooks.cpp12
-rw-r--r--tools/llvmc/examples/Skeleton/Main.cpp (renamed from tools/llvmc/driver/Main.cpp)3
-rw-r--r--tools/llvmc/examples/Skeleton/Makefile20
-rw-r--r--tools/llvmc/examples/Skeleton/README (renamed from tools/llvmc/example/Skeleton/README)2
-rw-r--r--tools/llvmc/examples/mcc16/Hooks.cpp (renamed from tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp)35
-rw-r--r--tools/llvmc/examples/mcc16/Main.cpp (renamed from tools/llvmc/example/mcc16/driver/Main.cpp)17
-rw-r--r--tools/llvmc/examples/mcc16/Makefile (renamed from tools/llvmc/plugins/Base/Makefile)6
-rw-r--r--tools/llvmc/examples/mcc16/PIC16.td (renamed from tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td)50
-rw-r--r--tools/llvmc/examples/mcc16/README (renamed from tools/llvmc/example/mcc16/README)2
-rw-r--r--tools/llvmc/plugins/Base/PluginMain.cpp1
-rw-r--r--tools/llvmc/plugins/Clang/PluginMain.cpp1
-rw-r--r--tools/llvmc/plugins/Makefile18
-rw-r--r--tools/llvmc/src/AutoGenerated.td17
-rw-r--r--tools/llvmc/src/Base.td.in (renamed from tools/llvmc/plugins/Base/Base.td.in)109
-rw-r--r--tools/llvmc/src/Clang.td (renamed from tools/llvmc/plugins/Clang/Clang.td)68
-rw-r--r--tools/llvmc/src/Hooks.cpp (renamed from tools/llvmc/plugins/Base/Hooks.cpp)0
-rw-r--r--tools/llvmc/src/Main.cpp (renamed from tools/llvmc/example/Skeleton/driver/Main.cpp)4
-rw-r--r--tools/llvmc/src/Makefile (renamed from tools/llvmc/driver/Makefile)5
-rw-r--r--tools/lto/LTOCodeGenerator.cpp50
-rw-r--r--tools/lto/LTOCodeGenerator.h6
-rw-r--r--tools/lto/LTOModule.cpp805
-rw-r--r--tools/lto/LTOModule.h1
-rw-r--r--tools/lto/Makefile13
-rw-r--r--tools/lto/lto.cpp28
-rw-r--r--tools/lto/lto.exports3
-rw-r--r--tools/opt/AnalysisWrappers.cpp22
-rw-r--r--tools/opt/GraphPrinters.cpp40
-rw-r--r--tools/opt/PrintSCC.cpp38
-rw-r--r--tools/opt/opt.cpp139
-rw-r--r--unittests/ADT/SmallVectorTest.cpp4
-rw-r--r--unittests/ADT/StringRefTest.cpp9
-rw-r--r--unittests/ADT/TripleTest.cpp117
-rw-r--r--unittests/Analysis/Makefile (renamed from tools/llvmc/example/mcc16/plugins/PIC16Base/Makefile)14
-rw-r--r--unittests/Analysis/ScalarEvolutionTest.cpp82
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp27
-rw-r--r--unittests/Makefile2
-rw-r--r--unittests/Makefile.unittest4
-rw-r--r--unittests/Support/Casting.cpp154
-rw-r--r--unittests/Support/ConstantRangeTest.cpp69
-rw-r--r--unittests/Support/ValueHandleTest.cpp1
-rw-r--r--unittests/VMCore/DerivedTypesTest.cpp57
-rw-r--r--unittests/VMCore/InstructionsTest.cpp4
-rw-r--r--unittests/VMCore/MetadataTest.cpp7
-rw-r--r--unittests/VMCore/PassManagerTest.cpp12
-rw-r--r--utils/FileCheck/FileCheck.cpp160
-rw-r--r--utils/FileUpdate/FileUpdate.cpp24
-rw-r--r--utils/Makefile7
-rwxr-xr-xutils/RegressionFinder.pl186
-rw-r--r--utils/TableGen/ARMDecoderEmitter.cpp61
-rw-r--r--utils/TableGen/AsmMatcherEmitter.cpp164
-rw-r--r--utils/TableGen/AsmWriterEmitter.cpp2
-rw-r--r--utils/TableGen/CallingConvEmitter.cpp2
-rw-r--r--utils/TableGen/ClangAttrEmitter.cpp582
-rw-r--r--utils/TableGen/ClangAttrEmitter.h39
-rw-r--r--utils/TableGen/CodeGenDAGPatterns.cpp4
-rw-r--r--utils/TableGen/CodeGenInstruction.cpp1
-rw-r--r--utils/TableGen/CodeGenInstruction.h1
-rw-r--r--utils/TableGen/CodeGenIntrinsics.h2
-rw-r--r--utils/TableGen/CodeGenRegisters.h32
-rw-r--r--utils/TableGen/CodeGenTarget.cpp10
-rw-r--r--utils/TableGen/DAGISelEmitter.cpp48
-rw-r--r--utils/TableGen/DAGISelEmitter.h2
-rw-r--r--utils/TableGen/DAGISelMatcherEmitter.cpp2
-rw-r--r--utils/TableGen/DAGISelMatcherGen.cpp7
-rw-r--r--utils/TableGen/EDEmitter.cpp59
-rw-r--r--utils/TableGen/EDEmitter.h3
-rw-r--r--utils/TableGen/FastISelEmitter.cpp24
-rw-r--r--utils/TableGen/InstrInfoEmitter.cpp1
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp4
-rw-r--r--utils/TableGen/LLVMCConfigurationEmitter.cpp646
-rw-r--r--utils/TableGen/NeonEmitter.cpp11
-rw-r--r--utils/TableGen/Record.cpp28
-rw-r--r--utils/TableGen/Record.h12
-rw-r--r--utils/TableGen/RegisterInfoEmitter.cpp30
-rw-r--r--utils/TableGen/TableGen.cpp98
-rw-r--r--utils/buildit/GNUmakefile16
-rwxr-xr-xutils/buildit/build_llvm33
-rw-r--r--utils/lit/lit/ExampleTests/lit.cfg3
-rw-r--r--utils/lit/lit/ExampleTests/required-and-missing.c4
-rw-r--r--utils/lit/lit/ExampleTests/required-and-present.c2
-rw-r--r--utils/lit/lit/TestFormats.py7
-rw-r--r--utils/lit/lit/TestRunner.py86
-rw-r--r--utils/lit/lit/TestingConfig.py9
-rwxr-xr-xutils/lit/lit/lit.py5
-rw-r--r--utils/llvm-lit/Makefile21
-rw-r--r--utils/llvm-lit/llvm-lit.in21
-rw-r--r--utils/llvm.grm4
-rwxr-xr-xutils/llvmdo4
-rwxr-xr-xutils/mkpatch37
-rwxr-xr-xutils/userloc.pl216
-rw-r--r--utils/valgrind/i386-pc-linux-gnu.supp34
-rw-r--r--utils/valgrind/x86_64-pc-linux-gnu.supp43
-rw-r--r--utils/vim/llvm.vim10
-rw-r--r--utils/vim/vimrc129
1399 files changed, 60754 insertions, 33441 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4f2221c9fdd..a6099d17c56a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,10 +1,20 @@
# See docs/CMake.html for instructions about how to build LLVM with CMake.
project(LLVM)
-cmake_minimum_required(VERSION 2.6.1)
+cmake_minimum_required(VERSION 2.8)
+
+# Add path for custom modules
+set(CMAKE_MODULE_PATH
+ ${CMAKE_MODULE_PATH}
+ "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
+ "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
+ )
+
+set(PACKAGE_VERSION "2.8")
+include(VersionFromVCS)
+add_version_info_from_vcs(PACKAGE_VERSION)
set(PACKAGE_NAME llvm)
-set(PACKAGE_VERSION 2.8svn)
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu")
@@ -53,7 +63,6 @@ set(LLVM_ALL_TARGETS
CppBackend
Mips
MBlaze
- MSIL
MSP430
PIC16
PowerPC
@@ -124,13 +133,6 @@ configure_file(
set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
-# Add path for custom modules
-set(CMAKE_MODULE_PATH
- ${CMAKE_MODULE_PATH}
- "${LLVM_MAIN_SRC_DIR}/cmake"
- "${LLVM_MAIN_SRC_DIR}/cmake/modules"
- )
-
include(AddLLVMDefinitions)
if(WIN32)
@@ -214,14 +216,8 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
if( MSVC )
- # List of valid CRTs for MSVC
- set(MSVC_CRT
- MD
- MDd
- MT
- MTd)
-
- set(LLVM_USE_CRT "" CACHE STRING "Specify VC++ CRT to use for debug/release configurations.")
+ include(ChooseMSVCCRT)
+
add_llvm_definitions( -D_CRT_SECURE_NO_DEPRECATE -D_CRT_SECURE_NO_WARNINGS )
add_llvm_definitions( -D_SCL_SECURE_NO_WARNINGS -DCRT_NONSTDC_NO_WARNINGS )
add_llvm_definitions( -D_SCL_SECURE_NO_DEPRECATE )
@@ -231,15 +227,6 @@ if( MSVC )
# Suppress 'new behavior: elements of array 'array' will be default initialized'
add_llvm_definitions( -wd4351 )
- if (NOT ${LLVM_USE_CRT} STREQUAL "")
- list(FIND MSVC_CRT ${LLVM_USE_CRT} idx)
- if (idx LESS 0)
- message(FATAL_ERROR "Invalid value for LLVM_USE_CRT: ${LLVM_USE_CRT}. Valid options are one of: ${MSVC_CRT}")
- endif (idx LESS 0)
- add_llvm_definitions("/${LLVM_USE_CRT}")
- message(STATUS "Using VC++ CRT: ${LLVM_USE_CRT}")
- endif (NOT ${LLVM_USE_CRT} STREQUAL "")
-
# Enable warnings
if (LLVM_ENABLE_WARNINGS)
add_llvm_definitions( /W4 /Wall )
@@ -308,6 +295,7 @@ add_subdirectory(lib/Analysis)
add_subdirectory(lib/Analysis/IPA)
add_subdirectory(lib/MC)
add_subdirectory(lib/MC/MCParser)
+add_subdirectory(lib/MC/MCDisassembler)
add_subdirectory(test)
add_subdirectory(utils/FileCheck)
@@ -372,6 +360,8 @@ add_subdirectory(tools)
option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." OFF)
add_subdirectory(examples)
+add_subdirectory(cmake/modules)
+
install(DIRECTORY include/
DESTINATION include
FILES_MATCHING
diff --git a/CREDITS.TXT b/CREDITS.TXT
index e58b85fdbd9c..aeecfe2e21e1 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -134,6 +134,11 @@ N: Gabor Greif
E: ggreif@gmail.com
D: Improvements for space efficiency
+N: James Grosbach
+E: grosbach@apple.com
+D: SjLj exception handling support
+D: General fixes and improvements for the ARM back-end
+
N: Lang Hames
E: lhames@gmail.com
D: PBQP-based register allocator
@@ -247,6 +252,12 @@ N: Scott Michel
E: scottm@aero.org
D: Added STI Cell SPU backend.
+N: Takumi Nakamura
+E: geek4civic@gmail.com
+E: chapuni@hf.rim.or.jp
+D: Cygwin and MinGW support.
+S: Yokohama, Japan
+
N: Edward O'Callaghan
E: eocallaghan@auroraux.org
W: http://www.auroraux.org
@@ -277,6 +288,11 @@ N: Sandeep Patel
E: deeppatel1987@gmail.com
D: ARM calling conventions rewrite, hard float support
+N: Wesley Peck
+E: peckw@wesleypeck.com
+W: http://wesleypeck.com/
+D: MicroBlaze backend
+
N: Vladimir Prus
W: http://vladimir_prus.blogspot.com
E: ghost@cs.msu.su
@@ -288,7 +304,10 @@ D: MSIL backend
N: Duncan Sands
E: baldrick@free.fr
-D: Ada front-end, exception handling improvements
+D: Ada support in llvm-gcc
+D: Dragonegg plugin
+D: Exception handling improvements
+D: Type legalizer rewrite
N: Ruchira Sasanka
E: sasanka@uiuc.edu
@@ -306,6 +325,10 @@ N: Anand Shukla
E: ashukla@cs.uiuc.edu
D: The `paths' pass
+N: Michael J. Spencer
+E: bigcheesegs@gmail.com
+D: Shepherding Windows COFF support into MC.
+
N: Reid Spencer
E: rspencer@reidspencer.com
W: http://reidspencer.com/
@@ -329,14 +352,9 @@ E: xerxes@zafena.se
D: Cmake dependency chain and various bug fixes
N: Bill Wendling
-E: isanbard@gmail.com
+E: wendling@apple.com
D: Bunches of stuff
N: Bob Wilson
E: bob.wilson@acm.org
D: Advanced SIMD (NEON) support in the ARM backend
-
-N: Wesley Peck
-E: peckw@wesleypeck.com
-W: http://wesleypeck.com/
-D: MicroBlaze backend
diff --git a/Makefile b/Makefile
index d42f887b6b18..ae650b7f2d93 100644
--- a/Makefile
+++ b/Makefile
@@ -64,7 +64,8 @@ endif
ifeq ($(MAKECMDGOALS),install-clang)
DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
- tools/clang/runtime tools/clang/docs
+ tools/clang/runtime tools/clang/docs \
+ tools/lto
OPTIONAL_DIRS :=
NO_INSTALL = 1
endif
@@ -78,7 +79,8 @@ ifeq ($(MAKECMDGOALS),install-clang-c)
endif
ifeq ($(MAKECMDGOALS),clang-only)
- DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) tools/clang
+ DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) \
+ tools/clang tools/lto
OPTIONAL_DIRS :=
endif
@@ -110,7 +112,8 @@ cross-compile-build-tools:
--host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE); \
cd .. ; \
fi; \
- ($(MAKE) -C BuildTools \
+ (unset SDKROOT; \
+ $(MAKE) -C BuildTools \
BUILD_DIRS_ONLY=1 \
UNIVERSAL= \
ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \
@@ -167,7 +170,7 @@ FilesToConfig := \
include/llvm/Config/AsmParsers.def \
include/llvm/Config/Disassemblers.def \
include/llvm/System/DataTypes.h \
- tools/llvmc/plugins/Base/Base.td
+ tools/llvmc/src/Base.td
FilesToConfigPATH := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
all-local:: $(FilesToConfigPATH)
@@ -192,9 +195,6 @@ endif
check-llvm2cpp:
$(Verb)$(MAKE) check TESTSUITE=Feature RUNLLVM2CPP=1
-check-one:
- $(Verb)$(MAKE) -C test check-one TESTONE=$(TESTONE)
-
srpm: $(LLVM_OBJ_ROOT)/llvm.spec
rpmbuild -bs $(LLVM_OBJ_ROOT)/llvm.spec
diff --git a/Makefile.config.in b/Makefile.config.in
index 1d54b317c3e1..5ebd80384fb0 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -39,14 +39,18 @@ ifndef PROJECT_NAME
PROJECT_NAME := $(LLVMPackageName)
endif
-PROJ_OBJ_DIR := $(shell $(PWD))
-PROJ_OBJ_ROOT := $(shell cd $(PROJ_OBJ_DIR)/$(LEVEL); $(PWD))
+# The macro below is expanded when 'realpath' is not built-in.
+# Built-in 'realpath' is available on GNU Make 3.81.
+realpath = $(shell cd $(1); $(PWD))
+
+PROJ_OBJ_DIR := $(call realpath, .)
+PROJ_OBJ_ROOT := $(call realpath, $(PROJ_OBJ_DIR)/$(LEVEL))
ifeq ($(PROJECT_NAME),llvm)
-LLVM_SRC_ROOT := $(shell cd @abs_top_srcdir@; $(PWD))
-LLVM_OBJ_ROOT := $(shell cd @abs_top_builddir@; $(PWD))
-PROJ_SRC_ROOT := $(shell cd $(LLVM_SRC_ROOT); $(PWD))
-PROJ_SRC_DIR := $(shell cd $(LLVM_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)); $(PWD))
+LLVM_SRC_ROOT := $(call realpath, @abs_top_srcdir@)
+LLVM_OBJ_ROOT := $(call realpath, @abs_top_builddir@)
+PROJ_SRC_ROOT := $(LLVM_SRC_ROOT)
+PROJ_SRC_DIR := $(call realpath, $(LLVM_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
prefix := @prefix@
PROJ_prefix := $(prefix)
PROJ_VERSION := $(LLVMVersion)
@@ -66,7 +70,7 @@ endif
ifndef LLVM_OBJ_ROOT
$(error Projects must define LLVM_OBJ_ROOT)
endif
-PROJ_SRC_DIR := $(shell cd $(PROJ_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)); $(PWD))
+PROJ_SRC_DIR := $(call realpath, $(PROJ_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
prefix := $(PROJ_INSTALL_ROOT)
PROJ_prefix := $(prefix)
ifndef PROJ_VERSION
diff --git a/Makefile.rules b/Makefile.rules
index 12582f6f91d2..2e18c66e2b7a 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -196,105 +196,15 @@ install-local:: all-local
install-bytecode:: install-bytecode-local
###############################################################################
-# LLVMC: Provide rules for compiling llvmc plugins
+# LLVMC: Provide rules for compiling llvmc-based driver
###############################################################################
-ifdef LLVMC_PLUGIN
-
-LIBRARYNAME := $(patsubst %,plugin_llvmc_%,$(LLVMC_PLUGIN))
-CPP.Flags += -DLLVMC_PLUGIN_NAME=$(LLVMC_PLUGIN)
-REQUIRES_EH := 1
-
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
- LD.Flags += -lCompilerDriver
-endif
-
-# Build a dynamic library if the user runs `make` directly from the plugin
-# directory.
-ifndef LLVMC_BUILTIN_PLUGIN
- LOADABLE_MODULE = 1
-endif
-
-# TableGen stuff...
-ifneq ($(BUILT_SOURCES),)
- LLVMC_BUILD_AUTOGENERATED_INC=1
-endif
-
-endif # LLVMC_PLUGIN
-
ifdef LLVMC_BASED_DRIVER
TOOLNAME = $(LLVMC_BASED_DRIVER)
-REQUIRES_EH := 1
-
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
- LD.Flags += -lCompilerDriver
-else
- LLVMLIBS = CompilerDriver.a
- LINK_COMPONENTS = support system
-endif
-
-# Preprocessor magic that generates references to static variables in built-in
-# plugins.
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-
-USEDLIBS += $(patsubst %,plugin_llvmc_%.a,$(LLVMC_BUILTIN_PLUGINS))
-
-LLVMC_BUILTIN_PLUGIN_1 = $(word 1, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_2 = $(word 2, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_3 = $(word 3, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_4 = $(word 4, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_5 = $(word 5, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_6 = $(word 6, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_7 = $(word 7, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_8 = $(word 8, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_9 = $(word 9, $(LLVMC_BUILTIN_PLUGINS))
-LLVMC_BUILTIN_PLUGIN_10 = $(word 10, $(LLVMC_BUILTIN_PLUGINS))
-
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_1),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_1=$(LLVMC_BUILTIN_PLUGIN_1)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_2),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_2=$(LLVMC_BUILTIN_PLUGIN_2)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_3),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_3=$(LLVMC_BUILTIN_PLUGIN_3)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_4),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_4=$(LLVMC_BUILTIN_PLUGIN_4)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_5),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_5)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_6),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_6)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_7),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_7)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_8),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_8)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_9),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_9)
-endif
-
-ifneq ($(LLVMC_BUILTIN_PLUGIN_10),)
-CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_10)
-endif
-
-
-endif
+LLVMLIBS = CompilerDriver.a
+LINK_COMPONENTS = support system
endif # LLVMC_BASED_DRIVER
@@ -501,6 +411,26 @@ LLVMToolDir := $(LLVM_OBJ_ROOT)/$(BuildMode)/bin
LLVMExmplDir:= $(LLVM_OBJ_ROOT)/$(BuildMode)/examples
#--------------------------------------------------------------------
+# Locations of shared libraries
+#--------------------------------------------------------------------
+
+SharedPrefix := lib
+SharedLibDir := $(LibDir)
+LLVMSharedLibDir := $(LLVMLibDir)
+
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ SharedLibDir := $(ToolDir)
+ LLVMSharedLibDir := $(LLVMToolDir)
+
+ ifeq ($(HOST_OS),Cygwin)
+ SharedPrefix := cyg
+ else
+ SharedPrefix :=
+ endif
+endif
+
+#--------------------------------------------------------------------
# LLVM Capable Compiler
#--------------------------------------------------------------------
@@ -573,12 +503,7 @@ ifeq ($(HOST_OS),Darwin)
SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
endif
else
- ifeq ($(HOST_OS),Cygwin)
- SharedLinkOptions=-shared -nostdlib -Wl,--export-all-symbols \
- -Wl,--enable-auto-import -Wl,--enable-auto-image-base
- else
- SharedLinkOptions=-shared
- endif
+ SharedLinkOptions=-shared
endif
ifeq ($(TARGET_OS),Darwin)
@@ -588,11 +513,13 @@ ifeq ($(TARGET_OS),Darwin)
endif
ifdef SHARED_LIBRARY
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
ifneq ($(HOST_OS),Darwin)
LD.Flags += $(RPATH) -Wl,'$$ORIGIN'
else
ifneq ($(DARWIN_MAJVERS),4)
- LD.Flags += $(RPATH) -Wl,$(LibDir)
+ LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
+endif
endif
endif
endif
@@ -621,8 +548,8 @@ ifndef KEEP_SYMBOLS
endif
# Adjust linker flags for building an executable
-ifneq ($(HOST_OS),Darwin)
-ifneq ($(DARWIN_MAJVERS),4)
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS), Darwin)
ifdef TOOLNAME
LD.Flags += $(RPATH) -Wl,'$$ORIGIN/../lib'
ifdef EXAMPLE_TOOL
@@ -631,12 +558,12 @@ ifdef TOOLNAME
LD.Flags += $(RPATH) -Wl,$(ToolDir) $(RDYNAMIC)
endif
endif
-endif
else
ifneq ($(DARWIN_MAJVERS),4)
LD.Flags += $(RPATH) -Wl,@executable_path/../lib
endif
endif
+endif
#----------------------------------------------------------
@@ -963,6 +890,13 @@ LLVMUsedLibs := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(LLVMLIBS)))
LLVMLibsPaths := $(addprefix $(LLVMLibDir)/,$(LLVMUsedLibs))
endif
+# Win32.DLL may refer to other components.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ ifdef LOADABLE_MODULE
+ LINK_COMPONENTS := all
+ endif
+endif
+
ifndef IS_CLEANING_TARGET
ifdef LINK_COMPONENTS
@@ -975,12 +909,28 @@ $(LLVM_CONFIG):
$(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT): $(LLVM_CONFIG)
ifeq ($(ENABLE_SHARED), 1)
+# We can take the "auto-import" feature to get rid of using dllimport.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+LLVMLibsOptions += -Wl,--enable-auto-import,--enable-runtime-pseudo-reloc \
+ -L $(SharedLibDir)
+endif
LLVMLibsOptions += -lLLVM-$(LLVMVersion)
-LLVMLibsPaths += $(LibDir)/libLLVM-$(LLVMVersion)$(SHLIBEXT)
+LLVMLibsPaths += $(SharedLibDir)/$(SharedPrefix)LLVM-$(LLVMVersion)$(SHLIBEXT)
else
-LLVMLibsOptions += $(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS))
-LLVMLibsPaths += $(LLVM_CONFIG) \
- $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS))
+
+ifndef NO_LLVM_CONFIG
+LLVMConfigLibs := $(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibs),Error)
+$(error llvm-config --libs failed)
+endif
+LLVMLibsOptions += $(LLVMConfigLibs)
+LLVMConfigLibfiles := $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibfiles),Error)
+$(error llvm-config --libfiles failed)
+endif
+LLVMLibsPaths += $(LLVM_CONFIG) $(LLVMConfigLibfiles)
+endif
+
endif
endif
endif
@@ -1011,12 +961,25 @@ $(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
clean-local::
-$(Verb) $(RM) -f $(NativeExportsFile)
else
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# GNU ld Win32 accepts .DEF files that contain "DATA" entries.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE:.exports=.def))
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+ $(Echo) Generating $(notdir $@)
+ $(Verb) $(ECHO) "EXPORTS" > $@
+ $(Verb) $(CAT) $< >> $@
+clean-local::
+ -$(Verb) $(RM) -f $(NativeExportsFile)
+else
+# Default behavior: just use the exports file verbatim.
NativeExportsFile := $(EXPORTED_SYMBOL_FILE)
endif
endif
+endif
# Now add the linker command-line options to use the native export file.
+# Darwin
ifeq ($(HOST_OS),Darwin)
LLVMLibsOptions += -Wl,-exported_symbols_list,$(NativeExportsFile)
endif
@@ -1026,6 +989,12 @@ ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
LLVMLibsOptions += -Wl,--version-script,$(NativeExportsFile)
endif
+# Windows
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# LLVMLibsOptions is invalidated at processing tools/llvm-shlib.
+SharedLinkOptions += $(NativeExportsFile)
+endif
+
endif
###############################################################################
@@ -1100,10 +1069,10 @@ ifdef LIBRARYNAME
LIBRARYNAME := $(strip $(LIBRARYNAME))
ifdef LOADABLE_MODULE
LibName.A := $(LibDir)/$(LIBRARYNAME).a
-LibName.SO := $(LibDir)/$(LIBRARYNAME)$(SHLIBEXT)
+LibName.SO := $(SharedLibDir)/$(LIBRARYNAME)$(SHLIBEXT)
else
LibName.A := $(LibDir)/lib$(LIBRARYNAME).a
-LibName.SO := $(LibDir)/lib$(LIBRARYNAME)$(SHLIBEXT)
+LibName.SO := $(SharedLibDir)/$(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
endif
LibName.O := $(LibDir)/$(LIBRARYNAME).o
LibName.BCA:= $(LibDir)/lib$(LIBRARYNAME).bca
@@ -1128,14 +1097,14 @@ SharedLibKindMessage := "Loadable Module"
else
SharedLibKindMessage := "Shared Library"
endif
-$(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(LibDir)/.dir
+$(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(SharedLibDir)/.dir
$(Echo) Linking $(BuildMode) $(SharedLibKindMessage) \
- $(LIBRARYNAME)$(SHLIBEXT)
+ $(notdir $@)
$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) \
$(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS)
else
-$(LibName.SO): $(ObjectsO) $(LibDir)/.dir
- $(Echo) Linking $(BuildMode) Shared Library $(basename $@)
+$(LibName.SO): $(ObjectsO) $(SharedLibDir)/.dir
+ $(Echo) Linking $(BuildMode) Shared Library $(notdir $@)
$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO)
endif
@@ -1151,21 +1120,23 @@ uninstall-local::
$(Echo) Uninstall circumvented with NO_INSTALL
else
-ifdef LOADABLE_MODULE
-DestSharedLib = $(DESTDIR)$(PROJ_libdir)/$(LIBRARYNAME)$(SHLIBEXT)
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+DestSharedLibDir := $(DESTDIR)$(PROJ_bindir)
else
-DestSharedLib = $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME)$(SHLIBEXT)
+DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
endif
+DestSharedLib := $(DestSharedLibDir)/$(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
install-local:: $(DestSharedLib)
-$(DestSharedLib): $(LibName.SO) $(DESTDIR)$(PROJ_libdir)
+$(DestSharedLib): $(LibName.SO) $(DestSharedLibDir)
$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
uninstall-local::
$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
- -$(Verb) $(RM) -f $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).*
+ -$(Verb) $(RM) -f $(DestSharedLibDir)/$(SharedPrefix)$(LIBRARYNAME).*
endif
endif
@@ -1341,10 +1312,33 @@ endif
endif
ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
+ifneq ($(ARCH), Mips)
LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
endif
endif
+endif
+
+#---------------------------------------------------------
+# Tool Version Info Support
+#---------------------------------------------------------
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_INFO_PLIST
+
+LD.Flags += -Wl,-sectcreate,__TEXT,__info_plist,$(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ToolBuildPath): $(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ObjDir)/$(TOOL_INFO_PLIST): $(PROJ_SRC_DIR)/$(TOOL_INFO_PLIST).in $(ObjDir)/.dir
+ $(Echo) "Creating $(TOOLNAME) '$(TOOL_INFO_PLIST)' file..."
+ $(Verb)sed -e "s#@TOOL_INFO_UTI@#$(TOOL_INFO_UTI)#g" \
+ -e "s#@TOOL_INFO_NAME@#$(TOOL_INFO_NAME)#g" \
+ -e "s#@TOOL_INFO_VERSION@#$(TOOL_INFO_VERSION)#g" \
+ -e "s#@TOOL_INFO_BUILD_VERSION@#$(TOOL_INFO_BUILD_VERSION)#g" \
+ $< > $@
+
+endif
+endif
#---------------------------------------------------------
# Provide targets for building the tools
@@ -1377,7 +1371,7 @@ $(ToolAliasBuildPath): $(ToolBuildPath)
$(Echo) Creating $(BuildMode) Alias $(TOOLALIAS) $(StripWarnMsg)
$(Verb) $(RM) -f $(ToolAliasBuildPath)
$(Verb) $(AliasTool) $(TOOLEXENAME) $(ToolAliasBuildPath)
- $(Echo) ======= Finished Creating $(BuildMode) Alias $(TOOLNAME) \
+ $(Echo) ======= Finished Creating $(BuildMode) Alias $(TOOLALIAS) \
$(StripWarnMsg)
endif
@@ -1626,7 +1620,7 @@ ifdef TARGET
TABLEGEN_INC_FILES_COMMON = 1
endif
-ifdef LLVMC_BUILD_AUTOGENERATED_INC
+ifdef LLVMC_BASED_DRIVER
TABLEGEN_INC_FILES_COMMON = 1
endif
@@ -1750,20 +1744,26 @@ clean-local::
endif # TARGET
-ifdef LLVMC_BUILD_AUTOGENERATED_INC
+ifdef LLVMC_BASED_DRIVER
+
+TDSrc := $(sort $(strip $(wildcard $(PROJ_SRC_DIR)/*.td)) \
+ $(strip $(wildcard $(PROJ_OBJ_DIR)/*.td)))
+
+TDCommon := $(strip $(wildcard \
+ $(LLVM_SRC_ROOT)/include/llvm/CompilerDriver/*.td))
-LLVMCPluginSrc := $(sort $(strip $(wildcard $(PROJ_SRC_DIR)/*.td)) \
- $(strip $(wildcard $(PROJ_OBJ_DIR)/*.td)))
+TDFiles := $(TDSrc) $(TDCommon)
-TDFiles := $(LLVMCPluginSrc) \
- $(strip $(wildcard $(LLVM_SRC_ROOT)/include/llvm/CompilerDriver/*.td))
+$(INCTMPFiles) : $(TBLGEN) $(TDFiles)
-$(ObjDir)/AutoGenerated.inc.tmp: $(LLVMCPluginSrc) $(ObjDir)/.dir \
- $(TBLGEN) $(TD_COMMON)
- $(Echo) "Building LLVMC configuration library with tblgen"
+$(ObjDir)/%.inc.tmp: %.td $(ObjDir)/.dir
+ $(Echo) "Building LLVMC compilation graph description with tblgen"
$(Verb) $(TableGen) -gen-llvmc -o $(call SYSPATH, $@) $<
-endif # LLVMC_BUILD_AUTOGENERATED_INC
+clean-local::
+ -$(Verb) $(RM) -f $(INCFiles)
+
+endif # LLVMC_BASED_DRIVER
###############################################################################
# OTHER RULES: Other rules needed
@@ -1840,11 +1840,13 @@ check::
$(EchoCmd) No test directory ; \
fi
-check-lit::
+check-lit:: check
+
+check-dg::
$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
$(EchoCmd) Running test suite ; \
- $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-lit ; \
+ $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-dg ; \
else \
$(EchoCmd) No Makefile in test directory ; \
fi ; \
diff --git a/README.txt b/README.txt
index 2ebe271b8e51..f54f5bf1b372 100644
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,4 @@
-Low Level Virtual Machine (LLVM)
+\Low Level Virtual Machine (LLVM)
================================
This directory and its subdirectories contain source code for the Low Level
@@ -13,3 +13,4 @@ assistance with LLVM.
If you're writing a package for LLVM, see docs/Packaging.html for our
suggestions.
+
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index be320cf3981c..de327449e23d 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -31,7 +31,7 @@ dnl===
dnl===-----------------------------------------------------------------------===
dnl Initialize autoconf and define the package name, version number and
dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[2.8svn]],[llvmbugs@cs.uiuc.edu])
+AC_INIT([[llvm]],[[2.8rc]],[llvmbugs@cs.uiuc.edu])
dnl Provide a copyright substitution and ensure the copyright notice is included
dnl in the output of --version option of the generated configure script.
@@ -101,7 +101,6 @@ for i in `ls ${srcdir}/projects`
do
if test -d ${srcdir}/projects/${i} ; then
case ${i} in
- CVS) ;;
sample) AC_CONFIG_SUBDIRS([projects/sample]) ;;
privbracket) AC_CONFIG_SUBDIRS([projects/privbracket]) ;;
llvm-stacker) AC_CONFIG_SUBDIRS([projects/llvm-stacker]) ;;
@@ -299,7 +298,7 @@ dnl Set the LINKALL and NOLINKALL Makefile variables based on the platform
AC_SUBST(LINKALL,$llvm_cv_link_all_option)
AC_SUBST(NOLINKALL,$llvm_cv_no_link_all_option)
-dnl Set the "LLVM_ON_*" variables based on llvm_cvs_platform_type
+dnl Set the "LLVM_ON_*" variables based on llvm_cv_platform_type
dnl This is used by lib/System to determine the basic kind of implementation
dnl to use.
case $llvm_cv_platform_type in
@@ -369,13 +368,13 @@ else
AC_SUBST(LLVM_CROSS_COMPILING, [0])
fi
-dnl Check to see if there's a "CVS" (or .svn or .git) directory indicating
-dnl that this build is being done from a checkout. This sets up several
-dnl defaults for the command line switches. When we build with a CVS directory,
+dnl Check to see if there's a .svn or .git directory indicating that this
+dnl build is being done from a checkout. This sets up several defaults for
+dnl the command line switches. When we build with a checkout directory,
dnl we get a debug with assertions turned on. Without, we assume a source
dnl release and we get an optimized build without assertions.
dnl See --enable-optimized and --enable-assertions below
-if test -d "CVS" -o -d "${srcdir}/CVS" -o -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
cvsbuild="yes"
optimize="no"
AC_SUBST(CVSBUILD,[[CVSBUILD=1]])
@@ -392,7 +391,7 @@ dnl===-----------------------------------------------------------------------===
dnl --enable-optimized : check whether they want to do an optimized build:
AC_ARG_ENABLE(optimized, AS_HELP_STRING(
- --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
+ --enable-optimized,[Compile with optimizations enabled (default is YES)]),,enableval="yes")
if test ${enableval} = "no" ; then
AC_SUBST(ENABLE_OPTIMIZED,[[]])
else
@@ -410,7 +409,7 @@ fi
dnl --enable-assertions : check whether they want to turn on assertions or not:
AC_ARG_ENABLE(assertions,AS_HELP_STRING(
- --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
+ --enable-assertions,[Compile with assertion checks enabled (default is NO)]),, enableval="no")
if test ${enableval} = "yes" ; then
AC_SUBST(DISABLE_ASSERTIONS,[[]])
else
@@ -544,13 +543,13 @@ TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu, pic16,
- xcore, msp430, systemz, blackfin, cbe, msil, and cpp (default=all)]),,
+ xcore, msp430, systemz, blackfin, cbe, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend MBlaze" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -567,7 +566,6 @@ case "$enableval" in
systemz) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
- msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
@@ -598,9 +596,17 @@ AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
# If so, define LLVM_NATIVE_ARCH to that LLVM target.
for a_target in $TARGETS_TO_BUILD; do
if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
- LLVM_NATIVE_ARCHTARGET="${LLVM_NATIVE_ARCH}Target"
- AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH,$LLVM_NATIVE_ARCHTARGET,
+ AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH, $LLVM_NATIVE_ARCH,
[LLVM architecture name for the native architecture, if available])
+ LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+ LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+ LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+ AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET,
+ [LLVM name for the native Target init function, if available])
+ AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
+ [LLVM name for the native TargetInfo init function, if available])
+ AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER,
+ [LLVM name for the native AsmPrinter init function, if available])
fi
done
@@ -857,35 +863,6 @@ AC_ARG_ENABLE(libffi,AS_HELP_STRING(
esac],
llvm_cv_enable_libffi=no)
-dnl Only Windows needs dynamic libCompilerDriver to support plugins.
-if test "$llvm_cv_os_type" = "Win32" ; then
- llvmc_dynamic="yes"
-else
- llvmc_dynamic="no"
-fi
-
-dnl --enable-llvmc-dynamic : should LLVMC link libCompilerDriver dynamically?
-AC_ARG_ENABLE(llvmc-dynamic,AS_HELP_STRING(
---enable-llvmc-dynamic,
-[Link LLVMC dynamically (default is NO, unless on Win32)]),,
-enableval=$llvmc_dynamic)
-if test ${enableval} = "yes" && test "$ENABLE_PIC" -eq 1 ; then
- AC_SUBST(ENABLE_LLVMC_DYNAMIC,[[ENABLE_LLVMC_DYNAMIC=1]])
-else
- AC_SUBST(ENABLE_LLVMC_DYNAMIC,[[]])
-fi
-
-dnl --enable-llvmc-dynamic-plugins : should LLVMC support dynamic plugins?
-AC_ARG_ENABLE(llvmc-dynamic-plugins,AS_HELP_STRING(
---enable-llvmc-dynamic-plugins,
-[Enable dynamic LLVMC plugins (default is YES)]),,
-enableval=yes)
-if test ${enableval} = "yes" ; then
- AC_SUBST(ENABLE_LLVMC_DYNAMIC_PLUGINS,[[ENABLE_LLVMC_DYNAMIC_PLUGINS=1]])
-else
- AC_SUBST(ENABLE_LLVMC_DYNAMIC_PLUGINS,[[]])
-fi
-
dnl===-----------------------------------------------------------------------===
dnl===
dnl=== SECTION 4: Check for programs we need and that they are the right version
@@ -1011,6 +988,13 @@ fi
dnl Find the install program
AC_PROG_INSTALL
+dnl Prepend src dir to install path dir if it's a relative path
+dnl This is a hack for installs that take place in something other
+dnl than the top level.
+case "$INSTALL" in
+ [[\\/$]]* | ?:[[\\/]]* ) ;;
+ *) INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
dnl Checks for documentation and testing tools that we can do without. If these
dnl are not found then they are set to "true" which always succeeds but does
@@ -1033,6 +1017,9 @@ AC_PATH_PROGS(OCAMLDEP, [ocamldep])
AC_PATH_PROGS(OCAMLDOC, [ocamldoc])
AC_PATH_PROGS(GAS, [gas as])
+dnl Get the version of the linker in use.
+AC_LINK_GET_VERSION
+
dnl Determine whether the linker supports the -R option.
AC_LINK_USE_R
@@ -1345,6 +1332,9 @@ fi
dnl atomic builtins are required for threading support.
AC_MSG_CHECKING(for GCC atomic builtins)
+dnl Since we'll be using these atomic builtins in C++ files we should test
+dnl the C++ compiler.
+AC_LANG_PUSH([C++])
AC_LINK_IFELSE(
AC_LANG_SOURCE(
[[int main() {
@@ -1356,13 +1346,13 @@ AC_LINK_IFELSE(
return 0;
}
]]),
+ AC_LANG_POP([C++])
AC_MSG_RESULT(yes)
AC_DEFINE(LLVM_MULTITHREADED, 1, Build multithreading support into LLVM),
AC_MSG_RESULT(no)
AC_DEFINE(LLVM_MULTITHREADED, 0, Build multithreading support into LLVM)
AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing]))
-
dnl===-----------------------------------------------------------------------===
dnl===
dnl=== SECTION 9: Additional checks, variables, etc.
@@ -1549,7 +1539,11 @@ dnl WARNING: dnl If you add or remove any of the following config headers, then
dnl you MUST also update Makefile.rules so that the variable FilesToConfig
dnl contains the same list of files as AC_CONFIG_HEADERS below. This ensures the
dnl files can be updated automatically when their *.in sources change.
-AC_CONFIG_HEADERS([include/llvm/Config/config.h])
+AC_CONFIG_HEADERS([include/llvm/Config/config.h include/llvm/Config/llvm-config.h])
+AH_TOP([#ifndef CONFIG_H
+#define CONFIG_H])
+AH_BOTTOM([#endif])
+
AC_CONFIG_FILES([include/llvm/Config/Targets.def])
AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def])
@@ -1563,7 +1557,7 @@ dnl Configure the RPM spec file for LLVM
AC_CONFIG_FILES([llvm.spec])
dnl Configure llvmc's Base plugin
-AC_CONFIG_FILES([tools/llvmc/plugins/Base/Base.td])
+AC_CONFIG_FILES([tools/llvmc/src/Base.td])
dnl Do the first stage of configuration for llvm-config.in.
AC_CONFIG_FILES([tools/llvm-config/llvm-config.in])
diff --git a/autoconf/m4/link_options.m4 b/autoconf/m4/link_options.m4
index b48710c094d9..4c5f2f435d04 100644
--- a/autoconf/m4/link_options.m4
+++ b/autoconf/m4/link_options.m4
@@ -1,4 +1,25 @@
#
+# Get the linker version string.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_GET_VERSION],
+ [AC_CACHE_CHECK([for linker version],[llvm_cv_link_version],
+ [
+ version_string="$(ld -v 2>&1 | head -1)"
+
+ # Check for ld64.
+ if (echo "$version_string" | grep -q "ld64"); then
+ llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+ else
+ llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+ fi
+ ])
+ AC_DEFINE_UNQUOTED([HOST_LINK_VERSION],"$llvm_cv_link_version",
+ [Linker version detected at compile time.])
+])
+
+#
# Determine if the system can handle the -R option being passed to the linker.
#
# This macro is specific to LLVM.
diff --git a/bindings/ada/llvm/llvm.ads b/bindings/ada/llvm/llvm.ads
index ce74e670a685..20fc940f8c24 100644
--- a/bindings/ada/llvm/llvm.ads
+++ b/bindings/ada/llvm/llvm.ads
@@ -317,25 +317,27 @@ package llvm is
LLVMGhostLinkage,
LLVMCommonLinkage,
LLVMLinkerPrivateLinkage,
- LLVMLinkerPrivateWeakLinkage);
+ LLVMLinkerPrivateWeakLinkage,
+ LinkerPrivateWeakDefAutoLinkage);
for LLVMLinkage use
- (LLVMExternalLinkage => 0,
- LLVMAvailableExternallyLinkage => 1,
- LLVMLinkOnceAnyLinkage => 2,
- LLVMLinkOnceODRLinkage => 3,
- LLVMWeakAnyLinkage => 4,
- LLVMWeakODRLinkage => 5,
- LLVMAppendingLinkage => 6,
- LLVMInternalLinkage => 7,
- LLVMPrivateLinkage => 8,
- LLVMDLLImportLinkage => 9,
- LLVMDLLExportLinkage => 10,
- LLVMExternalWeakLinkage => 11,
- LLVMGhostLinkage => 12,
- LLVMCommonLinkage => 13,
- LLVMLinkerPrivateLinkage => 14,
- LLVMLinkerPrivateWeakLinkage => 15);
+ (LLVMExternalLinkage => 0,
+ LLVMAvailableExternallyLinkage => 1,
+ LLVMLinkOnceAnyLinkage => 2,
+ LLVMLinkOnceODRLinkage => 3,
+ LLVMWeakAnyLinkage => 4,
+ LLVMWeakODRLinkage => 5,
+ LLVMAppendingLinkage => 6,
+ LLVMInternalLinkage => 7,
+ LLVMPrivateLinkage => 8,
+ LLVMDLLImportLinkage => 9,
+ LLVMDLLExportLinkage => 10,
+ LLVMExternalWeakLinkage => 11,
+ LLVMGhostLinkage => 12,
+ LLVMCommonLinkage => 13,
+ LLVMLinkerPrivateLinkage => 14,
+ LLVMLinkerPrivateWeakLinkage => 15,
+ LinkerPrivateWeakDefAutoLinkage => 16);
pragma Convention (C, LLVMLinkage);
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 7ab6f51efb9f..462eb201694b 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -35,7 +35,6 @@ module TypeKind = struct
| Opaque
| Vector
| Metadata
- | Union
end
module Linkage = struct
@@ -210,11 +209,6 @@ external struct_element_types : lltype -> lltype array
= "llvm_struct_element_types"
external is_packed : lltype -> bool = "llvm_is_packed"
-(*--... Operations on union types ..........................................--*)
-external union_type : llcontext -> lltype array -> lltype = "llvm_union_type"
-external union_element_types : lltype -> lltype array
- = "llvm_union_element_types"
-
(*--... Operations on pointer, vector, and array types .....................--*)
external array_type : lltype -> int -> lltype = "llvm_array_type"
external pointer_type : lltype -> lltype = "llvm_pointer_type"
@@ -280,6 +274,8 @@ let fold_right_uses f v init =
(*--... Operations on users ................................................--*)
external operand : llvalue -> int -> llvalue = "llvm_operand"
+external set_operand : llvalue -> int -> llvalue -> unit = "llvm_set_operand"
+external num_operands : llvalue -> int = "llvm_num_operands"
(*--... Operations on constants of (mostly) any type .......................--*)
external is_constant : llvalue -> bool = "llvm_is_constant"
@@ -319,7 +315,6 @@ external const_struct : llcontext -> llvalue array -> llvalue
external const_packed_struct : llcontext -> llvalue array -> llvalue
= "llvm_const_packed_struct"
external const_vector : llvalue array -> llvalue = "llvm_const_vector"
-external const_union : lltype -> llvalue -> llvalue = "LLVMConstUnion"
(*--... Constant expressions ...............................................--*)
external align_of : lltype -> llvalue = "LLVMAlignOf"
@@ -1050,9 +1045,6 @@ let rec string_of_lltype ty =
if is_packed ty
then "<" ^ s ^ ">"
else s
- | TypeKind.Union -> "union { " ^ (concat2 ", " (
- Array.map string_of_lltype (union_element_types ty)
- )) ^ " }"
| TypeKind.Array -> "[" ^ (string_of_int (array_length ty)) ^
" x " ^ (string_of_lltype (element_type ty)) ^ "]"
| TypeKind.Vector -> "<" ^ (string_of_int (vector_size ty)) ^
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 742265cd3d5c..ba3bbe248b71 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -72,7 +72,6 @@ module TypeKind : sig
| Opaque
| Vector
| Metadata
- | Union
end
(** The linkage of a global value, accessed with {!linkage} and
@@ -408,19 +407,6 @@ external struct_element_types : lltype -> lltype array
external is_packed : lltype -> bool = "llvm_is_packed"
-(** {7 Operations on union types} *)
-
-(** [union_type context tys] returns the union type in the context [context]
- containing the types in the array [tys]. See the method
- [llvm::UnionType::get] *)
-external union_type : llcontext -> lltype array -> lltype = "llvm_union_type"
-
-(** [union_element_types uty] returns the constituent types of the union type
- [uty]. See the method [llvm::UnionType::getElementType]. *)
-external union_element_types : lltype -> lltype array
- = "llvm_union_element_types"
-
-
(** {7 Operations on pointer, vector, and array types} *)
(** [array_type ty n] returns the array type containing [n] elements of type
@@ -557,6 +543,14 @@ val fold_right_uses : (lluse -> 'a -> 'a) -> llvalue -> 'a -> 'a
method [llvm::User::getOperand]. *)
external operand : llvalue -> int -> llvalue = "llvm_operand"
+(** [set_operand v i o] sets the operand of the value [v] at the index [i] to
+ the value [o].
+ See the method [llvm::User::setOperand]. *)
+external set_operand : llvalue -> int -> llvalue -> unit = "llvm_set_operand"
+
+(** [num_operands v] returns the number of operands for the value [v].
+ See the method [llvm::User::getNumOperands]. *)
+external num_operands : llvalue -> int = "llvm_num_operands"
(** {7 Operations on constants of (mostly) any type} *)
@@ -689,10 +683,6 @@ external const_packed_struct : llcontext -> llvalue array -> llvalue
values [elts]. See the method [llvm::ConstantVector::get]. *)
external const_vector : llvalue array -> llvalue = "llvm_const_vector"
-(** [const_union ty v] returns the union constant of type [union_type tys] and
- containing the value [v]. See the method [llvm::ConstantUnion::get]. *)
-external const_union : lltype -> llvalue -> llvalue = "LLVMConstUnion"
-
(** {7 Constant expressions} *)
@@ -991,7 +981,7 @@ external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
= "LLVMConstInsertElement"
(** [const_shufflevector a b mask] returns a constant [shufflevector].
- See the LLVM Language Reference for details on the [sufflevector]
+ See the LLVM Language Reference for details on the [shufflevector]
instruction.
See the method [llvm::ConstantExpr::getShuffleVector]. *)
external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index c4355ba2dbf1..ef2e3d66629c 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -318,21 +318,6 @@ CAMLprim value llvm_is_packed(LLVMTypeRef StructTy) {
return Val_bool(LLVMIsPackedStruct(StructTy));
}
-/*--... Operations on union types ..........................................--*/
-
-/* llcontext -> lltype array -> lltype */
-CAMLprim LLVMTypeRef llvm_union_type(LLVMContextRef C, value ElementTypes) {
- return LLVMUnionTypeInContext(C, (LLVMTypeRef *) ElementTypes,
- Wosize_val(ElementTypes));
-}
-
-/* lltype -> lltype array */
-CAMLprim value llvm_union_element_types(LLVMTypeRef UnionTy) {
- value Tys = alloc(LLVMCountUnionElementTypes(UnionTy), 0);
- LLVMGetUnionElementTypes(UnionTy, (LLVMTypeRef *) Tys);
- return Tys;
-}
-
/*--... Operations on array, pointer, and vector types .....................--*/
/* lltype -> int -> lltype */
@@ -452,6 +437,17 @@ CAMLprim LLVMValueRef llvm_operand(LLVMValueRef V, value I) {
return LLVMGetOperand(V, Int_val(I));
}
+/* llvalue -> int -> llvalue -> unit */
+CAMLprim value llvm_set_operand(LLVMValueRef U, value I, LLVMValueRef V) {
+ LLVMSetOperand(U, Int_val(I), V);
+ return Val_unit;
+}
+
+/* llvalue -> int */
+CAMLprim value llvm_num_operands(LLVMValueRef V) {
+ return Val_int(LLVMGetNumOperands(V));
+}
+
/*--... Operations on constants of (mostly) any type .......................--*/
/* llvalue -> bool */
@@ -964,8 +960,8 @@ CAMLprim LLVMValueRef llvm_param(LLVMValueRef Fn, value Index) {
return LLVMGetParam(Fn, Int_val(Index));
}
-/* llvalue -> int -> llvalue */
-CAMLprim value llvm_params(LLVMValueRef Fn, value Index) {
+/* llvalue -> llvalue */
+CAMLprim value llvm_params(LLVMValueRef Fn) {
value Params = alloc(LLVMCountParams(Fn), 0);
LLVMGetParams(Fn, (LLVMValueRef *) Op_val(Params));
return Params;
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index d9450d925859..f75e5dfb2656 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -214,6 +214,9 @@ if (LLVM_NATIVE_ARCH)
set(LLVM_NATIVE_ARCH)
else ()
message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
+ set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target)
+ set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
+ set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
endif ()
endif()
@@ -259,12 +262,19 @@ else( ENABLE_THREADS )
message(STATUS "Threads disabled.")
endif()
+set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
+
configure_file(
${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
${LLVM_BINARY_DIR}/include/llvm/Config/config.h
)
configure_file(
+ ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake
+ ${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h
+ )
+
+configure_file(
${LLVM_MAIN_INCLUDE_DIR}/llvm/System/DataTypes.h.cmake
${LLVM_BINARY_DIR}/include/llvm/System/DataTypes.h
)
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
new file mode 100644
index 000000000000..416d7f478564
--- /dev/null
+++ b/cmake/modules/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake")
+
+configure_file(
+ LLVM.cmake
+ ${llvm_cmake_builddir}/LLVM.cmake
+ @ONLY)
+
+install(FILES
+ ${llvm_cmake_builddir}/LLVM.cmake
+ LLVMConfig.cmake
+ LLVMLibDeps.cmake
+ DESTINATION share/llvm/cmake)
diff --git a/cmake/modules/ChooseMSVCCRT.cmake b/cmake/modules/ChooseMSVCCRT.cmake
new file mode 100644
index 000000000000..eb78f45c885a
--- /dev/null
+++ b/cmake/modules/ChooseMSVCCRT.cmake
@@ -0,0 +1,106 @@
+# The macro choose_msvc_crt() takes a list of possible
+# C runtimes to choose from, in the form of compiler flags,
+# to present to the user. (MTd for /MTd, etc)
+#
+# The macro is invoked at the end of the file.
+#
+# CMake already sets CRT flags in the CMAKE_CXX_FLAGS_* and
+# CMAKE_C_FLAGS_* variables by default. To let the user
+# override that for each build type:
+# 1. Detect which CRT is already selected, and reflect this in
+# LLVM_USE_CRT_* so the user can have a better idea of what
+# changes they're making.
+# 2. Replace the flags in both variables with the new flag via a regex.
+# 3. set() the variables back into the cache so the changes
+# are user-visible.
+
+### Helper macros: ###
+macro(make_crt_regex regex crts)
+ set(${regex} "")
+ foreach(crt ${${crts}})
+ # Trying to match the beginning or end of the string with stuff
+ # like [ ^]+ didn't work, so use a bunch of parentheses instead.
+ set(${regex} "${${regex}}|(^| +)/${crt}($| +)")
+ endforeach(crt)
+ string(REGEX REPLACE "^\\|" "" ${regex} "${${regex}}")
+endmacro(make_crt_regex)
+
+macro(get_current_crt crt_current regex flagsvar)
+ # Find the selected-by-CMake CRT for each build type, if any.
+ # Strip off the leading slash and any whitespace.
+ string(REGEX MATCH "${${regex}}" ${crt_current} "${${flagsvar}}")
+ string(REPLACE "/" " " ${crt_current} "${${crt_current}}")
+ string(STRIP "${${crt_current}}" ${crt_current})
+endmacro(get_current_crt)
+
+# Replaces or adds a flag to a variable.
+# Expects 'flag' to be padded with spaces.
+macro(set_flag_in_var flagsvar regex flag)
+ string(REGEX MATCH "${${regex}}" current_flag "${${flagsvar}}")
+ if("${current_flag}" STREQUAL "")
+ set(${flagsvar} "${${flagsvar}}${${flag}}")
+ else()
+ string(REGEX REPLACE "${${regex}}" "${${flag}}" ${flagsvar} "${${flagsvar}}")
+ endif()
+ string(STRIP "${${flagsvar}}" ${flagsvar})
+ # Make sure this change gets reflected in the cache/gui.
+ # CMake requires the docstring parameter whenever set() touches the cache,
+ # so get the existing docstring and re-use that.
+ get_property(flagsvar_docs CACHE ${flagsvar} PROPERTY HELPSTRING)
+ set(${flagsvar} "${${flagsvar}}" CACHE STRING "${flagsvar_docs}" FORCE)
+endmacro(set_flag_in_var)
+
+
+macro(choose_msvc_crt MSVC_CRT)
+ if(LLVM_USE_CRT)
+ message(FATAL_ERROR
+ "LLVM_USE_CRT is deprecated. Use the CMAKE_BUILD_TYPE-specific
+variables (LLVM_USE_CRT_DEBUG, etc) instead.")
+ endif()
+
+ make_crt_regex(MSVC_CRT_REGEX ${MSVC_CRT})
+
+ foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+ string(TOUPPER "${build_type}" build)
+ if (NOT LLVM_USE_CRT_${build})
+ get_current_crt(LLVM_USE_CRT_${build}
+ MSVC_CRT_REGEX
+ CMAKE_CXX_FLAGS_${build})
+ set(LLVM_USE_CRT_${build}
+ "${LLVM_USE_CRT_${build}}"
+ CACHE STRING "Specify VC++ CRT to use for ${build_type} configurations."
+ FORCE)
+ set_property(CACHE LLVM_USE_CRT_${build}
+ PROPERTY STRINGS "";${${MSVC_CRT}})
+ endif(NOT LLVM_USE_CRT_${build})
+ endforeach(build_type)
+
+ foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+ string(TOUPPER "${build_type}" build)
+ if ("${LLVM_USE_CRT_${build}}" STREQUAL "")
+ set(flag_string " ")
+ else()
+ set(flag_string " /${LLVM_USE_CRT_${build}} ")
+ list(FIND ${MSVC_CRT} ${LLVM_USE_CRT_${build}} idx)
+ if (idx LESS 0)
+ message(FATAL_ERROR
+ "Invalid value for LLVM_USE_CRT_${build}: ${LLVM_USE_CRT_${build}}. Valid options are one of: ${${MSVC_CRT}}")
+ endif (idx LESS 0)
+ message(STATUS "Using ${build_type} VC++ CRT: ${LLVM_USE_CRT_${build}}")
+ endif()
+ foreach(lang C CXX)
+ set_flag_in_var(CMAKE_${lang}_FLAGS_${build} MSVC_CRT_REGEX flag_string)
+ endforeach(lang)
+ endforeach(build_type)
+endmacro(choose_msvc_crt MSVC_CRT)
+
+
+# List of valid CRTs for MSVC
+set(MSVC_CRT
+ MD
+ MDd
+ MT
+ MTd)
+
+choose_msvc_crt(MSVC_CRT)
+
diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVM.cmake
new file mode 100644
index 000000000000..9621454f4119
--- /dev/null
+++ b/cmake/modules/LLVM.cmake
@@ -0,0 +1,29 @@
+# This file provides information and services to the final user.
+
+set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
+
+set(llvm_libs @llvm_libs@)
+
+set(llvm_lib_targets @llvm_lib_targets@)
+
+set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
+
+set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@)
+
+set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
+
+set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
+
+# We try to include using the current setting of CMAKE_MODULE_PATH,
+# which suppossedly was filled by the user with the directory where
+# this file was installed:
+include( LLVMConfig OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED )
+
+# If failed, we assume that this is an un-installed build:
+if( NOT LLVMCONFIG_INCLUDED )
+ set(CMAKE_MODULE_PATH
+ ${CMAKE_MODULE_PATH}
+ "@LLVM_SOURCE_DIR@/cmake/modules")
+ include( LLVMConfig )
+endif()
+
diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVMConfig.cmake
index 0744b50d648f..e5497084be84 100755
--- a/cmake/modules/LLVMConfig.cmake
+++ b/cmake/modules/LLVMConfig.cmake
@@ -16,6 +16,26 @@ function(get_system_libs return_var)
endfunction(get_system_libs)
+function(is_llvm_target_library library return_var)
+ # Sets variable `return_var' to ON if `library' corresponds to a
+ # LLVM supported target. To OFF if it doesn't.
+ set(${return_var} OFF PARENT_SCOPE)
+ string(TOUPPER "${library}" capitalized_lib)
+ string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
+ foreach(t ${targets})
+ if( capitalized_lib STREQUAL "LLVM${t}" OR
+ capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
+ capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
+ capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
+ capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR
+ capitalized_lib STREQUAL "LLVM${t}INFO" )
+ set(${return_var} ON PARENT_SCOPE)
+ break()
+ endif()
+ endforeach()
+endfunction(is_llvm_target_library)
+
+
macro(llvm_config executable)
explicit_llvm_config(${executable} ${ARGN})
endmacro(llvm_config)
@@ -29,6 +49,14 @@ function(explicit_llvm_config executable)
endfunction(explicit_llvm_config)
+# This is a variant intended for the final user:
+function(llvm_map_components_to_libraries OUT_VAR)
+ explicit_map_components_to_libraries(result ${ARGN})
+ get_system_libs(sys_result)
+ set( ${OUT_VAR} ${result} ${sys_result} PARENT_SCOPE )
+endfunction(llvm_map_components_to_libraries)
+
+
function(explicit_map_components_to_libraries out_libs)
set( link_components ${ARGN} )
foreach(c ${link_components})
@@ -86,16 +114,24 @@ function(explicit_map_components_to_libraries out_libs)
list(GET expanded_components 0 c)
string(TOUPPER "${c}" capitalized)
list(FIND capitalized_libs ${capitalized} idx)
+ set(add_it ON)
if( idx LESS 0 )
- message(FATAL_ERROR "Library ${c} not found in list of llvm libraries.")
+ # The library is unkown. Maybe is an ommitted target?
+ is_llvm_target_library(${c} iltl_result)
+ if( NOT iltl_result )
+ message(FATAL_ERROR "Library ${c} not found in list of llvm libraries.")
+ endif()
+ set(add_it OFF)
endif( idx LESS 0 )
list(GET llvm_libs ${idx} canonical_lib)
list(REMOVE_ITEM result ${canonical_lib})
- list(APPEND result ${canonical_lib})
foreach(c ${MSVC_LIB_DEPS_${canonical_lib}})
list(REMOVE_ITEM expanded_components ${c})
endforeach()
- list(APPEND expanded_components ${MSVC_LIB_DEPS_${canonical_lib}})
+ if( add_it )
+ list(APPEND result ${canonical_lib})
+ list(APPEND expanded_components ${MSVC_LIB_DEPS_${canonical_lib}})
+ endif()
list(REMOVE_AT expanded_components 0)
list(LENGTH expanded_components lst_size)
endwhile( 0 LESS ${lst_size} )
@@ -115,13 +151,13 @@ endfunction(explicit_map_components_to_libraries)
# The format generated by GenLibDeps.pl
-# LLVMARMAsmPrinter.o: LLVMARMCodeGen.o libLLVMAsmPrinter.a libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMTarget.a
+# libLLVMARMAsmPrinter.a: libLLVMMC.a libLLVMSupport.a
# is translated to:
-# set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
+# set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
-# It is necessary to remove the `lib' prefix and the `.a'.
+# It is necessary to remove the `lib' prefix and the `.a' suffix.
# This 'sed' script should do the trick:
# sed -e s'#\.a##g' -e 's#libLLVM#LLVM#g' -e 's#: # #' -e 's#\(.*\)#set(MSVC_LIB_DEPS_\1)#' ~/llvm/tools/llvm-config/LibDeps.txt
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index c9798485dec6..e639b04e9800 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -1,6 +1,6 @@
-set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMCParser LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMARMInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMARMInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMAlphaAsmPrinter LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
@@ -8,9 +8,9 @@ set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMMCParser LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport)
set(MSVC_LIB_DEPS_LLVMBlackfinAsmPrinter LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMSupport)
@@ -27,29 +27,28 @@ set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMSystem LLVMTarget
set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils)
set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMJIT LLVMAnalysis LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMSystem LLVMTransformUtils)
set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMCodeGen LLVMCore LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMMC LLVMSupport LLVMSystem)
+set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMInfo LLVMAlphaAsmPrinter LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinAsmPrinter LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUAsmPrinter LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmPrinter LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430AsmPrinter LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsAsmPrinter LLVMMipsCodeGen LLVMMipsInfo LLVMPIC16AsmPrinter LLVMPIC16CodeGen LLVMPIC16Info LLVMPowerPCAsmPrinter LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcAsmPrinter LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystem LLVMSystemZAsmPrinter LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreAsmPrinter LLVMXCoreCodeGen LLVMXCoreInfo)
set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMSIL LLVMAnalysis LLVMCodeGen LLVMCore LLVMMSILInfo LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
-set(MSVC_LIB_DEPS_LLVMMSILInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMSupport)
set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsCodeGen LLVMMipsInfo LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPIC16 LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16 LLVMPIC16Info LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16AsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPIC16CodeGen LLVMPIC16Info LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPIC16CodeGen LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMPIC16Info LLVMSelectionDAG LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPIC16Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSparcAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSparcInfo LLVMSupport LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMSystem LLVMTarget)
set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMSupport)
@@ -61,12 +60,12 @@ set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport)
set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMipa)
set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMMCParser LLVMSupport LLVMTarget LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMX86CodeGen LLVMX86Info)
-set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystem LLVMTarget LLVMX86AsmPrinter LLVMX86Info)
set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
set(MSVC_LIB_DEPS_LLVMX86Info LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMXCore LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
set(MSVC_LIB_DEPS_LLVMXCoreAsmPrinter LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMSupport)
set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport LLVMSystem)
-set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMSystem LLVMTarget LLVMTransformUtils LLVMipa)
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
new file mode 100644
index 000000000000..1016df22590d
--- /dev/null
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -0,0 +1,33 @@
+# Adds version control information to the variable VERS. For
+# determining the Version Control System used (if any) it inspects the
+# existence of certain subdirectories under CMAKE_CURRENT_SOURCE_DIR.
+
+function(add_version_info_from_vcs VERS)
+ set(result ${${VERS}})
+ if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.svn )
+ set(result "${result}svn")
+ find_package(Subversion)
+ if( Subversion_FOUND )
+ subversion_wc_info( ${CMAKE_CURRENT_SOURCE_DIR} Project )
+ if( Project_WC_REVISION )
+ set(result "${result}-r${Project_WC_REVISION}")
+ endif()
+ endif()
+ elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git )
+ set(result "${result}git")
+ # Try to get a ref-id
+ find_program(git_executable NAMES git git.exe git.cmd)
+ if( git_executable )
+ execute_process(COMMAND ${git_executable} show-ref HEAD
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ TIMEOUT 5
+ RESULT_VARIABLE git_result
+ OUTPUT_VARIABLE git_output)
+ if( git_result EQUAL 0 )
+ string(SUBSTRING ${git_output} 0 7 git_ref_id)
+ set(result "${result}-${git_ref_id}")
+ endif()
+ endif()
+ endif()
+ set(${VERS} ${result} PARENT_SCOPE)
+endfunction(add_version_info_from_vcs)
diff --git a/configure b/configure
index dc1b5b3fbe32..d2b98bfe661f 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for llvm 2.8svn.
+# Generated by GNU Autoconf 2.60 for llvm 2.8rc.
#
# Report bugs to <llvmbugs@cs.uiuc.edu>.
#
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
# Identity of this package.
PACKAGE_NAME='llvm'
PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='2.8svn'
-PACKAGE_STRING='llvm 2.8svn'
+PACKAGE_VERSION='2.8rc'
+PACKAGE_STRING='llvm 2.8rc'
PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
ac_unique_file="lib/VMCore/Module.cpp"
@@ -703,8 +703,6 @@ ENABLE_BUILT_CLANG
OPTIMIZE_OPTION
EXTRA_OPTIONS
BINUTILS_INCDIR
-ENABLE_LLVMC_DYNAMIC
-ENABLE_LLVMC_DYNAMIC_PLUGINS
CXX
CXXFLAGS
ac_ct_CXX
@@ -1320,7 +1318,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures llvm 2.8svn to adapt to many kinds of systems.
+\`configure' configures llvm 2.8rc to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1386,7 +1384,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of llvm 2.8svn:";;
+ short | recursive ) echo "Configuration of llvm 2.8rc:";;
esac
cat <<\_ACEOF
@@ -1416,17 +1414,13 @@ Optional Features:
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, alpha, arm, mips, spu,
- pic16, xcore, msp430, systemz, blackfin, cbe, msil,
- and cpp (default=all)
+ pic16, xcore, msp430, systemz, blackfin, cbe, and
+ cpp (default=all)
--enable-cbe-printf-a Enable C Backend output with hex floating point via
%a (default is YES)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
- --enable-llvmc-dynamic Link LLVMC dynamically (default is NO, unless on
- Win32)
- --enable-llvmc-dynamic-plugins
- Enable dynamic LLVMC plugins (default is YES)
--enable-ltdl-install install libltdl
Optional Packages:
@@ -1539,7 +1533,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-llvm configure 2.8svn
+llvm configure 2.8rc
generated by GNU Autoconf 2.60
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1555,7 +1549,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by llvm $as_me 2.8svn, which was
+It was created by llvm $as_me 2.8rc, which was
generated by GNU Autoconf 2.60. Invocation command line was
$ $0 $@
@@ -1988,7 +1982,6 @@ for i in `ls ${srcdir}/projects`
do
if test -d ${srcdir}/projects/${i} ; then
case ${i} in
- CVS) ;;
sample) subdirs="$subdirs projects/sample"
;;
privbracket) subdirs="$subdirs projects/privbracket"
@@ -4691,7 +4684,7 @@ else
fi
-if test -d "CVS" -o -d "${srcdir}/CVS" -o -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
cvsbuild="yes"
optimize="no"
CVSBUILD=CVSBUILD=1
@@ -4706,7 +4699,7 @@ fi
if test "${enable_optimized+set}" = set; then
enableval=$enable_optimized;
else
- enableval=$optimize
+ enableval="yes"
fi
if test ${enableval} = "no" ; then
@@ -4736,7 +4729,7 @@ fi
if test "${enable_assertions+set}" = set; then
enableval=$enable_assertions;
else
- enableval="yes"
+ enableval="no"
fi
if test ${enableval} = "yes" ; then
@@ -4962,7 +4955,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
- all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend MBlaze" ;;
+ all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@@ -4979,7 +4972,6 @@ case "$enableval" in
systemz) TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
cbe) TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
- msil) TARGETS_TO_BUILD="MSIL $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
@@ -5015,10 +5007,27 @@ TARGETS_TO_BUILD=$TARGETS_TO_BUILD
# If so, define LLVM_NATIVE_ARCH to that LLVM target.
for a_target in $TARGETS_TO_BUILD; do
if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
- LLVM_NATIVE_ARCHTARGET="${LLVM_NATIVE_ARCH}Target"
cat >>confdefs.h <<_ACEOF
-#define LLVM_NATIVE_ARCH $LLVM_NATIVE_ARCHTARGET
+#define LLVM_NATIVE_ARCH $LLVM_NATIVE_ARCH
+_ACEOF
+
+ LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+ LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+ LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGETINFO $LLVM_NATIVE_TARGETINFO
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER
_ACEOF
fi
@@ -5374,42 +5383,6 @@ else
fi
-if test "$llvm_cv_os_type" = "Win32" ; then
- llvmc_dynamic="yes"
-else
- llvmc_dynamic="no"
-fi
-
-# Check whether --enable-llvmc-dynamic was given.
-if test "${enable_llvmc_dynamic+set}" = set; then
- enableval=$enable_llvmc_dynamic;
-else
- enableval=$llvmc_dynamic
-fi
-
-if test ${enableval} = "yes" && test "$ENABLE_PIC" -eq 1 ; then
- ENABLE_LLVMC_DYNAMIC=ENABLE_LLVMC_DYNAMIC=1
-
-else
- ENABLE_LLVMC_DYNAMIC=
-
-fi
-
-# Check whether --enable-llvmc-dynamic-plugins was given.
-if test "${enable_llvmc_dynamic_plugins+set}" = set; then
- enableval=$enable_llvmc_dynamic_plugins;
-else
- enableval=yes
-fi
-
-if test ${enableval} = "yes" ; then
- ENABLE_LLVMC_DYNAMIC_PLUGINS=ENABLE_LLVMC_DYNAMIC_PLUGINS=1
-
-else
- ENABLE_LLVMC_DYNAMIC_PLUGINS=
-
-fi
-
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
@@ -8004,6 +7977,10 @@ test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+case "$INSTALL" in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *) INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
# Extract the first word of "bzip2", so it can be a program name with args.
set dummy bzip2; ac_word=$2
@@ -8721,6 +8698,31 @@ fi
done
+{ echo "$as_me:$LINENO: checking for linker version" >&5
+echo $ECHO_N "checking for linker version... $ECHO_C" >&6; }
+if test "${llvm_cv_link_version+set}" = set; then
+ echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+ version_string="$(ld -v 2>&1 | head -1)"
+
+ # Check for ld64.
+ if (echo "$version_string" | grep -q "ld64"); then
+ llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+ else
+ llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+ fi
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_version" >&5
+echo "${ECHO_T}$llvm_cv_link_version" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define HOST_LINK_VERSION "$llvm_cv_link_version"
+_ACEOF
+
+
+
{ echo "$as_me:$LINENO: checking for compiler -Wl,-R<path> option" >&5
echo $ECHO_N "checking for compiler -Wl,-R<path> option... $ECHO_C" >&6; }
if test "${llvm_cv_link_use_r+set}" = set; then
@@ -11387,7 +11389,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
-#line 11390 "configure"
+#line 11392 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -19991,6 +19993,12 @@ fi
{ echo "$as_me:$LINENO: checking for GCC atomic builtins" >&5
echo $ECHO_N "checking for GCC atomic builtins... $ECHO_C" >&6; }
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
cat >conftest.$ac_ext <<_ACEOF
/* confdefs.h. */
_ACEOF
@@ -20041,6 +20049,12 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
ac_status=$?
echo "$as_me:$LINENO: \$? = $ac_status" >&5
(exit $ac_status); }; }; then
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
{ echo "$as_me:$LINENO: result: yes" >&5
echo "${ECHO_T}yes" >&6; }
@@ -20067,7 +20081,6 @@ rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-
if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
{ echo "$as_me:$LINENO: checking for 32-bit userspace on 64-bit system" >&5
echo $ECHO_N "checking for 32-bit userspace on 64-bit system... $ECHO_C" >&6; }
@@ -20563,7 +20576,12 @@ fi
-ac_config_headers="$ac_config_headers include/llvm/Config/config.h"
+ac_config_headers="$ac_config_headers include/llvm/Config/config.h include/llvm/Config/llvm-config.h"
+
+
+
+
+
ac_config_files="$ac_config_files include/llvm/Config/Targets.def"
@@ -20582,7 +20600,7 @@ ac_config_files="$ac_config_files Makefile.config"
ac_config_files="$ac_config_files llvm.spec"
-ac_config_files="$ac_config_files tools/llvmc/plugins/Base/Base.td"
+ac_config_files="$ac_config_files tools/llvmc/src/Base.td"
ac_config_files="$ac_config_files tools/llvm-config/llvm-config.in"
@@ -21027,7 +21045,7 @@ exec 6>&1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by llvm $as_me 2.8svn, which was
+This file was extended by llvm $as_me 2.8rc, which was
generated by GNU Autoconf 2.60. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -21080,7 +21098,7 @@ Report bugs to <bug-autoconf@gnu.org>."
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF
ac_cs_version="\\
-llvm config.status 2.8svn
+llvm config.status 2.8rc
configured by $0, generated by GNU Autoconf 2.60,
with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
@@ -21194,6 +21212,7 @@ for ac_config_target in $ac_config_targets
do
case $ac_config_target in
"include/llvm/Config/config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/config.h" ;;
+ "include/llvm/Config/llvm-config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/llvm-config.h" ;;
"include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;;
"include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
"include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;;
@@ -21201,7 +21220,7 @@ do
"include/llvm/System/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/System/DataTypes.h" ;;
"Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
"llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
- "tools/llvmc/plugins/Base/Base.td") CONFIG_FILES="$CONFIG_FILES tools/llvmc/plugins/Base/Base.td" ;;
+ "tools/llvmc/src/Base.td") CONFIG_FILES="$CONFIG_FILES tools/llvmc/src/Base.td" ;;
"tools/llvm-config/llvm-config.in") CONFIG_FILES="$CONFIG_FILES tools/llvm-config/llvm-config.in" ;;
"setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
"Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
@@ -21421,8 +21440,6 @@ ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
-ENABLE_LLVMC_DYNAMIC!$ENABLE_LLVMC_DYNAMIC$ac_delim
-ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim
CXX!$CXX$ac_delim
CXXFLAGS!$CXXFLAGS$ac_delim
ac_ct_CXX!$ac_ct_CXX$ac_delim
@@ -21514,7 +21531,7 @@ LIBOBJS!$LIBOBJS$ac_delim
LTLIBOBJS!$LTLIBOBJS$ac_delim
_ACEOF
- if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then
+ if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 93; then
break
elif $ac_last_try; then
{ { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
index a23d90848e41..cffaa8206228 100644
--- a/docs/AliasAnalysis.html
+++ b/docs/AliasAnalysis.html
@@ -238,10 +238,10 @@ a location, ModRef is returned.</p>
<p>The <tt>AliasAnalysis</tt> class also provides a <tt>getModRefInfo</tt>
method for testing dependencies between function calls. This method takes two
-call sites (CS1 &amp; CS2), returns NoModRef if the two calls refer to disjoint
-memory locations, Ref if CS1 reads memory written by CS2, Mod if CS1 writes to
-memory read or written by CS2, or ModRef if CS1 might read or write memory
-accessed by CS2. Note that this relation is not commutative.</p>
+call sites (CS1 &amp; CS2), returns NoModRef if neither call writes to memory
+read or written by the other, Ref if CS1 reads memory written by CS2, Mod if CS1
+writes to memory read or written by CS2, or ModRef if CS1 might read or write
+memory written to by CS2. Note that this relation is not commutative.</p>
</div>
@@ -998,7 +998,7 @@ analysis directly.</p>
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-07 16:27:09 +0200 (Wed, 07 Jul 2010) $
+ Last modified: $Date: 2010-08-31 01:47:24 +0200 (Tue, 31 Aug 2010) $
</address>
</body>
diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html
index f1f175d70553..bd53a1edd76c 100644
--- a/docs/BitCodeFormat.html
+++ b/docs/BitCodeFormat.html
@@ -1367,21 +1367,6 @@ type to the type table.
</p>
</div>
-<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"><a name="TYPE_CODE_UNION">TYPE_CODE_UNION Record</a>
-</div>
-
-<div class="doc_text">
-
-<p><tt>[UNION, ...eltty...]</tt></p>
-
-<p>The <tt>UNION</tt> record (code 17) adds a <tt>union</tt> type to
-the type table. The <i>eltty</i> operand fields are zero or more type
-indices representing the element types of the union.
-</p>
-
-</div>
-
<!-- ======================================================================= -->
<div class="doc_subsection"><a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
</div>
@@ -1489,7 +1474,7 @@ name. Each entry corresponds to a single named type.
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2010-05-22 00:20:54 +0200 (Sat, 22 May 2010) $
+Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
</address>
</body>
</html>
diff --git a/docs/CMake.html b/docs/CMake.html
index 40a2cec8e91b..ca0b50f628e9 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -313,9 +313,15 @@
<div class="doc_text">
-<p>LLVM testing is not supported on Visual Studio.</p>
+<p>Testing is performed when the <i>check</i> target is built. For
+ instance, if you are using makefiles, execute this command while on
+ the top level of your build directory:</p>
-<p>TODO</p>
+<div class="doc_code">
+ <p><tt>make check</tt></p>
+</div>
+
+<p>Testing is not supported on Visual Studio.</p>
</div>
@@ -348,7 +354,38 @@
<div class="doc_text">
-<p>TODO</p>
+ <p>The most difficult part of adding LLVM to the build of a project
+ is to determine the set of LLVM libraries corresponding to the set
+ of required LLVM features. What follows is an example of how to
+ obtain this information:</p>
+
+ <div class="doc_code">
+ <pre>
+ <b># A convenience variable:</b>
+ set(LLVM_ROOT "" CACHE PATH "Root of LLVM install.")
+ <b># A bit of a sanity check:</b>
+ if( NOT EXISTS ${LLVM_ROOT}/include/llvm )
+ message(FATAL_ERROR "LLVM_ROOT (${LLVM_ROOT}) is not a valid LLVM install")
+ endif()
+ <b># We incorporate the CMake features provided by LLVM:</b>
+ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake")
+ include(LLVM)
+ <b># Now set the header and library paths:</b>
+ include_directories( ${LLVM_ROOT}/include )
+ link_directories( ${LLVM_ROOT}/lib )
+ <b># Let's suppose we want to build a JIT compiler with support for
+ # binary code (no interpreter):</b>
+ llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native)
+ <b># Finally, we link the LLVM libraries to our executable:</b>
+ target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES})
+ </pre>
+ </div>
+
+ <p>This assumes that LLVM_ROOT points to an install of LLVM. The
+ procedure works too for uninstalled builds although we need to take
+ care to add an <i>include_directories</i> for the location of the
+ headers on the LLVM source directory (if we are building
+ out-of-source.)</p>
</div>
@@ -377,7 +414,7 @@
<a href="mailto:ofv@wanadoo.es">Oscar Fuentes</a><br>
<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2008-12-31 03:59:36 +0100 (Wed, 31 Dec 2008) $
+ Last modified: $Date: 2010-08-09 03:59:36 +0100 (Mon, 9 Aug 2010) $
</address>
</body>
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 407178777a34..4b2e261094bd 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -1457,8 +1457,8 @@ bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
order to get and store values in memory. To assign a physical register to a
virtual register present in a given operand,
use <tt>MachineOperand::setReg(p_reg)</tt>. To insert a store instruction,
- use <tt>TargetRegisterInfo::storeRegToStackSlot(...)</tt>, and to insert a
- load instruction, use <tt>TargetRegisterInfo::loadRegFromStackSlot</tt>.</p>
+ use <tt>TargetInstrInfo::storeRegToStackSlot(...)</tt>, and to insert a
+ load instruction, use <tt>TargetInstrInfo::loadRegFromStackSlot</tt>.</p>
<p>The indirect mapping shields the application developer from the complexities
of inserting load and store instructions. In order to map a virtual register
@@ -2162,7 +2162,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-06-15 23:58:33 +0200 (Tue, 15 Jun 2010) $
+ Last modified: $Date: 2010-09-01 00:01:07 +0200 (Wed, 01 Sep 2010) $
</address>
</body>
diff --git a/docs/CommandGuide/bugpoint.pod b/docs/CommandGuide/bugpoint.pod
index 7afeea1aba5a..1870a0d84b60 100644
--- a/docs/CommandGuide/bugpoint.pod
+++ b/docs/CommandGuide/bugpoint.pod
@@ -67,6 +67,10 @@ tool.
Pass all arguments specified after B<--gcc-tool-args> to the invocation of
B<gcc>.
+=item B<--opt-args> I<opt args>
+
+Pass all arguments specified after B<--opt-args> to the invocation of B<opt>.
+
=item B<--disable-{dce,simplifycfg}>
Do not run the specified passes to clean up and reduce the size of the test
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index 62cb7765a47d..67f0cfc1a110 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -72,6 +72,9 @@ options) arguments to the tool you are interested in.</p>
<li><a href="/cmds/llvmc.html"><b>llvmc</b></a> -
a generic customizable compiler driver</li>
+<li><a href="/cmds/llvm-diff.html"><b>llvm-diff</b></a> -
+ structurally compare two modules</li>
+
</ul>
</div>
@@ -148,7 +151,7 @@ options) arguments to the tool you are interested in.</p>
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-05-11 18:47:42 +0200 (Tue, 11 May 2010) $
+ Last modified: $Date: 2010-09-08 01:32:02 +0200 (Wed, 08 Sep 2010) $
</address>
</body>
diff --git a/docs/CommandGuide/llvm-diff.pod b/docs/CommandGuide/llvm-diff.pod
new file mode 100644
index 000000000000..c8cfdb3be94d
--- /dev/null
+++ b/docs/CommandGuide/llvm-diff.pod
@@ -0,0 +1,53 @@
+=pod
+
+=head1 NAME
+
+llvm-diff - LLVM structural 'diff'
+
+=head1 SYNOPSIS
+
+B<llvm-diff> [I<options>] I<module 1> I<module 2> [I<global name ...>]
+
+=head1 DESCRIPTION
+
+B<llvm-diff> compares the structure of two LLVM modules, primarily
+focusing on differences in function definitions. Insignificant
+differences, such as changes in the ordering of globals or in the
+names of local values, are ignored.
+
+An input module will be interpreted as an assembly file if its name
+ends in '.ll'; otherwise it will be read in as a bitcode file.
+
+If a list of global names is given, just the values with those names
+are compared; otherwise, all global values are compared, and
+diagnostics are produced for globals which only appear in one module
+or the other.
+
+B<llvm-diff> compares two functions by comparing their basic blocks,
+beginning with the entry blocks. If the terminators seem to match,
+then the corresponding successors are compared; otherwise they are
+ignored. This algorithm is very sensitive to changes in control flow,
+which tend to stop any downstream changes from being detected.
+
+B<llvm-diff> is intended as a debugging tool for writers of LLVM
+passes and frontends. It does not have a stable output format.
+
+=head1 EXIT STATUS
+
+If B<llvm-diff> finds no differences between the modules, it will exit
+with 0 and produce no output. Otherwise it will exit with a non-zero
+value.
+
+=head1 BUGS
+
+Many important differences, like changes in linkage or function
+attributes, are not diagnosed.
+
+Changes in memory behavior (for example, coalescing loads) can cause
+massive detected differences in blocks.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 37bfb8990e01..47352009ea04 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -43,7 +43,8 @@
is to eliminate miscommunication, rework, and confusion that might arise from
the distributed nature of LLVM's development. By stating the policy in clear
terms, we hope each developer can know ahead of time what to expect when
- making LLVM contributions.</p>
+ making LLVM contributions. This policy covers all llvm.org subprojects,
+ including Clang, LLDB, etc.</p>
<p>This policy is also designed to accomplish the following objectives:</p>
<ol>
@@ -77,17 +78,28 @@
<!-- _______________________________________________________________________ -->
<div class="doc_subsection"> <a name="informed">Stay Informed</a> </div>
<div class="doc_text">
-<p>Developers should stay informed by reading at least the
- <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> email
- list. If you are doing anything more than just casual work on LLVM, it is
- suggested that you also subscribe to the
- <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>
- list and pay attention to changes being made by others.</p>
+<p>Developers should stay informed by reading at least the "dev" mailing list
+ for the projects you are interested in, such as
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> for
+ LLVM, <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev">cfe-dev</a>
+ for Clang, or <a
+ href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev">lldb-dev</a>
+ for LLDB. If you are doing anything more than just casual work on LLVM, it
+ is suggested that you also subscribe to the "commits" mailing list for the
+ subproject you're interested in, such as
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>,
+ <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits">cfe-commits</a>,
+ or <a href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits">lldb-commits</a>.
+ Reading the "commits" list and paying attention to changes being made by
+ others is a good way to see what other people are interested in and watching
+ the flow of the project as a whole.</p>
<p>We recommend that active developers register an email account with
<a href="http://llvm.org/bugs/">LLVM Bugzilla</a> and preferably subscribe to
the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs">llvm-bugs</a>
- email list to keep track of bugs and enhancements occurring in LLVM.</p>
+ email list to keep track of bugs and enhancements occurring in LLVM. We
+ really appreciate people who are proactive at catching incoming bugs in their
+ components and dealing with them promptly.</p>
</div>
<!-- _______________________________________________________________________ -->
@@ -107,18 +119,13 @@
patches may not apply correctly if the underlying code changes between the
time the patch was created and the time it is applied.</li>
- <li>Patches should be made with this command:
-<div class="doc_code">
-<pre>
-svn diff
-</pre>
-</div>
- or with the utility <tt>utils/mkpatch</tt>, which makes it easy to read
- the diff.</li>
+ <li>Patches should be made with <tt>svn diff</tt>, or similar. If you use
+ a different tool, make sure it uses the <tt>diff -u</tt> format and
+ that it doesn't contain clutter which makes it hard to read.</li>
- <li>Patches should not include differences in generated code such as the code
- generated by <tt>autoconf</tt> or <tt>tblgen</tt>. The
- <tt>utils/mkpatch</tt> utility takes care of this for you.</li>
+ <li>If you are modifying generated files, such as the top-level
+ <tt>configure</tt> script, please separate out those changes into
+ a separate patch from the rest of your changes.</li>
</ol>
<p>When sending a patch to a mailing list, it is a good idea to send it as an
@@ -239,8 +246,9 @@ svn diff
them short.</li>
</ol>
-<p>Note that llvm/test is designed for regression and small feature tests
- only. More extensive test cases (e.g., entire applications, benchmarks, etc)
+<p>Note that llvm/test and clang/test are designed for regression and small
+ feature tests only. More extensive test cases (e.g., entire applications,
+ benchmarks, etc)
should be added to the <tt>llvm-test</tt> test suite. The llvm-test suite is
for coverage (correctness, performance, etc) testing, not feature or
regression testing.</p>
@@ -263,7 +271,7 @@ svn diff
testcase</a> so we know if the fix/feature ever regresses in the
future.</li>
- <li>Code must pass the dejagnu (<tt>llvm/test</tt>) test suite.</li>
+ <li>Code must pass the <tt>llvm/test</tt> test suite.</li>
<li>The code must not cause regressions on a reasonable subset of llvm-test,
where "reasonable" depends on the contributor's judgement and the scope of
@@ -601,7 +609,7 @@ Changes</a></div>
Written by the
<a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+ Last modified: $Date: 2010-09-02 02:09:17 +0200 (Thu, 02 Sep 2010) $
</address>
</body>
</html>
diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html
index f4eccf79a2a5..0b2827c9d5c1 100644
--- a/docs/GCCFEBuildInstrs.html
+++ b/docs/GCCFEBuildInstrs.html
@@ -84,7 +84,7 @@ top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
<li><p>The build requires having a compiler that supports Ada, C and C++.
The Ada front-end is written in Ada so an Ada compiler is needed to
build it. Compilers known to work with the
- <a href="http://llvm.org/releases/download.html">LLVM 2.5 release</a>
+ <a href="http://llvm.org/releases/download.html">LLVM 2.7 release</a>
are <a href="http://gcc.gnu.org/releases.html">gcc-4.2</a> and the
2005, 2006 and 2007 versions of the
<a href="http://libre.adacore.com/">GNAT GPL Edition</a>.
@@ -116,9 +116,9 @@ top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
and unpack it:</p>
<pre class="doc_code">
-wget http://llvm.org/releases/2.5/llvm-2.5.tar.gz
-tar xzf llvm-2.5.tar.gz
-mv llvm-2.5 llvm
+wget http://llvm.org/releases/2.7/llvm-2.7.tgz
+tar xzf llvm-2.7.tgz
+mv llvm-2.7 llvm
</pre>
<p>or <a href="GettingStarted.html#checkout">check out the
@@ -133,9 +133,9 @@ mv llvm-2.5 llvm
and unpack it:</p>
<pre class="doc_code">
-wget http://llvm.org/releases/2.5/llvm-gcc-4.2-2.5.source.tar.gz
-tar xzf llvm-gcc-4.2-2.5.source.tar.gz
-mv llvm-gcc4.2-2.5.source llvm-gcc-4.2
+wget http://llvm.org/releases/2.7/llvm-gcc-4.2-2.7.source.tgz
+tar xzf llvm-gcc-4.2-2.7.source.tgz
+mv llvm-gcc-4.2-2.7.source llvm-gcc-4.2
</pre>
<p>or <a href="GettingStarted.html#checkout">check out the
@@ -272,7 +272,7 @@ More information is <a href="FAQ.html#license">available in the FAQ</a>.
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+ Last modified: $Date: 2010-08-31 21:40:21 +0200 (Tue, 31 Aug 2010) $
</address>
</body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index aa874ae4751d..d840c9788ac1 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -26,7 +26,6 @@
<li><a href="#lead0">Why don't GEP x,0,0,1 and GEP x,1 alias? </a></li>
<li><a href="#trail0">Why do GEP x,1,0,0 and GEP x,1 alias? </a></li>
<li><a href="#vectors">Can GEP index into vector elements?</a>
- <li><a href="#unions">Can GEP index into unions?</a>
<li><a href="#addrspace">What effect do address spaces have on GEPs?</a>
<li><a href="#int">How is GEP different from ptrtoint, arithmetic, and inttoptr?</a></li>
<li><a href="#be">I'm writing a backend for a target which needs custom lowering for GEP. How do I do this?</a>
@@ -370,16 +369,6 @@ idx3 = (char*) &amp;MyVar + 8
<!-- *********************************************************************** -->
<div class="doc_subsection">
- <a name="unions"><b>Can GEP index into unions?</b></a>
-</div>
-<div class="doc_text">
- <p>Unknown.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<div class="doc_subsection">
<a name="addrspace"><b>What effect do address spaces have on GEPs?</b></a>
</div>
<div class="doc_text">
@@ -730,7 +719,7 @@ idx3 = (char*) &amp;MyVar + 8
<a href="http://validator.w3.org/check/referer"><img
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
- Last modified: $Date: 2010-07-06 17:26:33 +0200 (Tue, 06 Jul 2010) $
+ Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
</address>
</body>
</html>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 66e099bad0a0..3f2e9fb2e640 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -79,7 +79,7 @@ placed.
the plugin <tt>.so</tt> file. To find out what link command <tt>gcc</tt>
would run in a given situation, run <tt>gcc -v <em>[...]</em></tt> and look
for the line where it runs <tt>collect2</tt>. Replace that with
- <tt>ld-new -plugin /path/to/libLLVMgold.so</tt> to test it out. Once you're
+ <tt>ld-new -plugin /path/to/LLVMgold.so</tt> to test it out. Once you're
ready to switch to using gold, backup your existing <tt>/usr/bin/ld</tt>
then replace it with <tt>ld-new</tt>.</p>
<p>You can produce bitcode files from <tt>llvm-gcc</tt> using
@@ -91,7 +91,7 @@ placed.
linker, which is why you need gold to be the installed system linker in your
path.</p>
<p>If you want <tt>ar</tt> and <tt>nm</tt> to work seamlessly as well, install
- <tt>libLLVMgold.so</tt> to <tt>/usr/lib/bfd-plugins</tt>. If you built your
+ <tt>LLVMgold.so</tt> to <tt>/usr/lib/bfd-plugins</tt>. If you built your
own gold, be sure to install the <tt>ar</tt> and <tt>nm-new</tt> you built to
<tt>/usr/bin</tt>.
<p>
@@ -157,9 +157,9 @@ $ llvm-gcc -use-gold-plugin a.a b.o -o main # &lt;-- link with LLVMgold plugin
bitcode, everything is in place for an easy to use LTO build of autotooled
projects:</p>
<ul>
- <li>Follow the instructions <a href="#build">on how to build libLLVMgold.so</a>.</li>
+ <li>Follow the instructions <a href="#build">on how to build LLVMgold.so</a>.</li>
<li>Install the newly built binutils to <tt>$PREFIX</tt></li>
- <li>Copy <tt>Release/lib/libLLVMgold.so</tt> to
+ <li>Copy <tt>Release/lib/LLVMgold.so</tt> to
<tt>$PREFIX/libexec/gcc/x86_64-unknown-linux-gnu/4.2.1/</tt> and
<tt>$PREFIX/lib/bfd-plugins/</tt></li>
<li>Set environment variables (<tt>$PREFIX</tt> is where you installed llvm-gcc and
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 4b4348d07f25..b717531e3479 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -25,6 +25,7 @@
<li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
<li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
<li><a href="#linkage_linker_private_weak">'<tt>linker_private_weak</tt>' Linkage</a></li>
+ <li><a href="#linkage_linker_private_weak_def_auto">'<tt>linker_private_weak_def_auto</tt>' Linkage</a></li>
<li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
<li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
<li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
@@ -73,7 +74,6 @@
<li><a href="#t_array">Array Type</a></li>
<li><a href="#t_struct">Structure Type</a></li>
<li><a href="#t_pstruct">Packed Structure Type</a></li>
- <li><a href="#t_union">Union Type</a></li>
<li><a href="#t_vector">Vector Type</a></li>
</ol>
</li>
@@ -491,20 +491,21 @@
the "hello world" module:</p>
<pre class="doc_code">
-<i>; Declare the string constant as a global constant.</i>
-<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00" <i>; [13 x i8]*</i>
+<i>; Declare the string constant as a global constant.</i>&nbsp;
+<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a> <a href="#globalvars">constant</a> <a href="#t_array">[13 x i8]</a> c"hello world\0A\00" <i>; [13 x i8]*</i>&nbsp;
-<i>; External declaration of the puts function</i>
-<a href="#functionstructure">declare</a> i32 @puts(i8*) <i>; i32 (i8*)* </i>
+<i>; External declaration of the puts function</i>&nbsp;
+<a href="#functionstructure">declare</a> i32 @puts(i8*) <i>; i32 (i8*)* </i>&nbsp;
<i>; Definition of main function</i>
-define i32 @main() { <i>; i32()* </i>
- <i>; Convert [13 x i8]* to i8 *...</i>
- %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0 <i>; i8*</i>
+define i32 @main() { <i>; i32()* </i>&nbsp;
+ <i>; Convert [13 x i8]* to i8 *...</i>&nbsp;
+ %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0 <i>; i8*</i>&nbsp;
- <i>; Call puts function to write out the string to stdout.</i>
- <a href="#i_call">call</a> i32 @puts(i8* %cast210) <i>; i32</i>
- <a href="#i_ret">ret</a> i32 0<br>}
+ <i>; Call puts function to write out the string to stdout.</i>&nbsp;
+ <a href="#i_call">call</a> i32 @puts(i8* %cast210) <i>; i32</i>&nbsp;
+ <a href="#i_ret">ret</a> i32 0&nbsp;
+}
<i>; Named metadata</i>
!1 = metadata !{i32 41}
@@ -556,6 +557,15 @@ define i32 @main() { <i>; i32()* </i>
linker. The symbols are removed by the linker from the final linked image
(executable or dynamic library).</dd>
+ <dt><tt><b><a name="linkage_linker_private_weak_def_auto">linker_private_weak_def_auto</a></b></tt></dt>
+ <dd>Similar to "<tt>linker_private_weak</tt>", but it's known that the address
+ of the object is not taken. For instance, functions that had an inline
+ definition, but the compiler decided not to inline it. Note,
+ unlike <tt>linker_private</tt> and <tt>linker_private_weak</tt>,
+ <tt>linker_private_weak_def_auto</tt> may have only <tt>default</tt>
+ visibility. The symbols are removed by the linker from the final linked
+ image (executable or dynamic library).</dd>
+
<dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
<dd>Similar to private, but the value shows as a local symbol
(<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
@@ -788,7 +798,7 @@ define i32 @main() { <i>; i32()* </i>
</pre>
<p>You may give a name to any <a href="#typesystem">type</a> except
- "<a href="t_void">void</a>". Type name aliases may be used anywhere a type
+ "<a href="#t_void">void</a>". Type name aliases may be used anywhere a type
is expected with the syntax "%mytype".</p>
<p>Note that type names are aliases for the structural type that they indicate,
@@ -949,15 +959,17 @@ define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
<div class="doc_text">
<p>Named metadata is a collection of metadata. <a href="#metadata">Metadata
- nodes</a> (but not metadata strings) and null are the only valid operands for
+ nodes</a> (but not metadata strings) are the only valid operands for
a named metadata.</p>
<h5>Syntax:</h5>
<pre class="doc_code">
-; An unnamed metadata node, which is referenced by the named metadata.
+; Some unnamed metadata nodes, which are referenced by the named metadata.
+!0 = metadata !{metadata !"zero"}
!1 = metadata !{metadata !"one"}
+!2 = metadata !{metadata !"two"}
; A named metadata.
-!name = !{null, !1}
+!name = !{!0, !1, !2}
</pre>
</div>
@@ -1462,7 +1474,6 @@ Classifications</a> </div>
<a href="#t_pointer">pointer</a>,
<a href="#t_vector">vector</a>,
<a href="#t_struct">structure</a>,
- <a href="#t_union">union</a>,
<a href="#t_array">array</a>,
<a href="#t_label">label</a>,
<a href="#t_metadata">metadata</a>.
@@ -1482,7 +1493,6 @@ Classifications</a> </div>
<a href="#t_pointer">pointer</a>,
<a href="#t_struct">structure</a>,
<a href="#t_pstruct">packed structure</a>,
- <a href="#t_union">union</a>,
<a href="#t_vector">vector</a>,
<a href="#t_opaque">opaque</a>.
</td>
@@ -1630,8 +1640,8 @@ Classifications</a> </div>
<p>Aggregate Types are a subset of derived types that can contain multiple
member types. <a href="#t_array">Arrays</a>,
- <a href="#t_struct">structs</a>, <a href="#t_vector">vectors</a> and
- <a href="#t_union">unions</a> are aggregate types.</p>
+ <a href="#t_struct">structs</a>, and <a href="#t_vector">vectors</a> are
+ aggregate types.</p>
</div>
@@ -1701,9 +1711,7 @@ Classifications</a> </div>
<h5>Overview:</h5>
<p>The function type can be thought of as a function signature. It consists of
a return type and a list of formal parameter types. The return type of a
- function type is a scalar type, a void type, a struct type, or a union
- type. If the return type is a struct type then all struct elements must be
- of first class types, and the struct must have at least one element.</p>
+ function type is a first class type or a void type.</p>
<h5>Syntax:</h5>
<pre>
@@ -1825,53 +1833,6 @@ Classifications</a> </div>
</div>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsubsection"> <a name="t_union">Union Type</a> </div>
-
-<div class="doc_text">
-
-<h5>Overview:</h5>
-<p>A union type describes an object with size and alignment suitable for
- an object of any one of a given set of types (also known as an "untagged"
- union). It is similar in concept and usage to a
- <a href="#t_struct">struct</a>, except that all members of the union
- have an offset of zero. The elements of a union may be any type that has a
- size. Unions must have at least one member - empty unions are not allowed.
- </p>
-
-<p>The size of the union as a whole will be the size of its largest member,
- and the alignment requirements of the union as a whole will be the largest
- alignment requirement of any member.</p>
-
-<p>Union members are accessed using '<tt><a href="#i_load">load</a></tt> and
- '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
- the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
- Since all members are at offset zero, the getelementptr instruction does
- not affect the address, only the type of the resulting pointer.</p>
-
-<h5>Syntax:</h5>
-<pre>
- union { &lt;type list&gt; }
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
- <tr class="layout">
- <td class="left"><tt>union { i32, i32*, float }</tt></td>
- <td class="left">A union of three types: an <tt>i32</tt>, a pointer to
- an <tt>i32</tt>, and a <tt>float</tt>.</td>
- </tr><tr class="layout">
- <td class="left">
- <tt>union {&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
- <td class="left">A union, where the first element is a <tt>float</tt> and the
- second element is a <a href="#t_pointer">pointer</a> to a
- <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
- an <tt>i32</tt>.</td>
- </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
<div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
<div class="doc_text">
@@ -2112,14 +2073,6 @@ Classifications</a> </div>
the number and types of elements must match those specified by the
type.</dd>
- <dt><b>Union constants</b></dt>
- <dd>Union constants are represented with notation similar to a structure with
- a single element - that is, a single typed element surrounded
- by braces (<tt>{}</tt>)). For example: "<tt>{ i32 4 }</tt>". The
- <a href="#t_union">union type</a> can be initialized with a single-element
- struct as long as the type of the struct element matches the type of
- one of the union members.</dd>
-
<dt><b>Array constants</b></dt>
<dd>Array constants are represented with notation similar to array type
definitions (a comma separated list of elements, surrounded by square
@@ -4140,7 +4093,7 @@ Instruction</a> </div>
<h5>Arguments:</h5>
<p>The first operand of an '<tt>extractvalue</tt>' instruction is a value
- of <a href="#t_struct">struct</a>, <a href="#t_union">union</a> or
+ of <a href="#t_struct">struct</a> or
<a href="#t_array">array</a> type. The operands are constant indices to
specify which value to extract in a similar manner as indices in a
'<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
@@ -4174,7 +4127,7 @@ Instruction</a> </div>
<h5>Arguments:</h5>
<p>The first operand of an '<tt>insertvalue</tt>' instruction is a value
- of <a href="#t_struct">struct</a>, <a href="#t_union">union</a> or
+ of <a href="#t_struct">struct</a> or
<a href="#t_array">array</a> type. The second operand is a first-class
value to insert. The following operands are constant indices indicating
the position at which to insert the value in a similar manner as indices in a
@@ -4407,12 +4360,12 @@ Instruction</a> </div>
indexes a value of the type pointed to (not necessarily the value directly
pointed to, since the first index can be non-zero), etc. The first type
indexed into must be a pointer value, subsequent types can be arrays,
- vectors, structs and unions. Note that subsequent types being indexed into
+ vectors, and structs. Note that subsequent types being indexed into
can never be pointers, since that would require loading the pointer before
continuing calculation.</p>
<p>The type of each index argument depends on the type it is indexing into.
- When indexing into a (optionally packed) structure or union, only <tt>i32</tt>
+ When indexing into a (optionally packed) structure, only <tt>i32</tt>
integer <b>constants</b> are allowed. When indexing into an array, pointer
or vector, integers of any width are allowed, and they are not required to be
constant.</p>
@@ -6117,8 +6070,8 @@ LLVM</a>.</p>
<h5>Syntax:</h5>
<p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
- width and for different address spaces. Not all targets support all bit
- widths however.</p>
+ width and for different address spaces. However, not all targets support all
+ bit widths.</p>
<pre>
declare void @llvm.memset.p0i8.i32(i8* &lt;dest&gt;, i8 &lt;val&gt;,
@@ -6132,14 +6085,14 @@ LLVM</a>.</p>
particular byte value.</p>
<p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
- intrinsic does not return a value, takes extra alignment/volatile arguments,
- and the destination can be in an arbitrary address space.</p>
+ intrinsic does not return a value and takes extra alignment/volatile
+ arguments. Also, the destination can be in an arbitrary address space.</p>
<h5>Arguments:</h5>
<p>The first argument is a pointer to the destination to fill, the second is the
- byte value to fill it with, the third argument is an integer argument
+ byte value with which to fill it, the third argument is an integer argument
specifying the number of bytes to fill, and the fourth argument is the known
- alignment of destination location.</p>
+ alignment of the destination location.</p>
<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
then the caller guarantees that the destination pointer is aligned to that
@@ -7746,7 +7699,7 @@ LLVM</a>.</p>
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-13 21:48:13 +0200 (Tue, 13 Jul 2010) $
+ Last modified: $Date: 2010-08-28 06:09:24 +0200 (Sat, 28 Aug 2010) $
</address>
</body>
diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html
index dd9047866005..38b7ae19fa64 100644
--- a/docs/MakefileGuide.html
+++ b/docs/MakefileGuide.html
@@ -785,6 +785,9 @@
not.</dd>
<dt><a name="PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a></dt>
<dd>The directory which contains the source files to be built.</dd>
+ <dt><a name="BUILD_EXAMPLES"><tt>BUILD_EXAMPLES</tt></a></dt>
+ <dd>If set to 1, build examples in <tt>examples</tt> and (if building
+ Clang) <tt>tools/clang/examples</tt> directories.</dd>
<dt><a name="BZIP2"><tt>BZIP2</tt></a><small>(configured)</small></dt>
<dd>The path to the <tt>bzip2</tt> tool.</dd>
<dt><a name="CC"><tt>CC</tt></a><small>(configured)</small></dt>
@@ -1025,7 +1028,7 @@
<a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-07 09:48:00 +0200 (Wed, 07 Jul 2010) $
+ Last modified: $Date: 2010-07-24 19:54:00 +0200 (Sat, 24 Jul 2010) $
</address>
</body>
</html>
diff --git a/docs/Passes.html b/docs/Passes.html
index 70d909796e68..0358745f79f7 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -120,6 +120,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<tr><td><a href="#print-used-types">-print-used-types</a></td><td>Find Used Types</td></tr>
<tr><td><a href="#profile-estimator">-profile-estimator</a></td><td>Estimate profiling information</td></tr>
<tr><td><a href="#profile-loader">-profile-loader</a></td><td>Load profile information from llvmprof.out</td></tr>
+<tr><td><a href="#regions">-regions</a></td><td>Detect single entry single exit regions in a function</td></tr>
<tr><td><a href="#profile-verifier">-profile-verifier</a></td><td>Verify profiling information</td></tr>
<tr><td><a href="#scalar-evolution">-scalar-evolution</a></td><td>Scalar Evolution Analysis</td></tr>
<tr><td><a href="#scev-aa">-scev-aa</a></td><td>ScalarEvolution-based Alias Analysis</td></tr>
@@ -166,6 +167,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll loops</td></tr>
<tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch loops</td></tr>
<tr><td><a href="#loopsimplify">-loopsimplify</a></td><td>Canonicalize natural loops</td></tr>
+<tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics</td></tr>
<tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower invoke and unwind, for unwindless code generators</td></tr>
<tr><td><a href="#lowersetjmp">-lowersetjmp</a></td><td>Lower Set Jump</td></tr>
<tr><td><a href="#lowerswitch">-lowerswitch</a></td><td>Lower SwitchInst's to branches</td></tr>
@@ -647,7 +649,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<div class="doc_text">
<p>
This pass, only available in <code>opt</code>, prints the call graph to
- standard output in a human-readable form.
+ standard error in a human-readable form.
</p>
</div>
@@ -658,7 +660,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<div class="doc_text">
<p>
This pass, only available in <code>opt</code>, prints the SCCs of the call
- graph to standard output in a human-readable form.
+ graph to standard error in a human-readable form.
</p>
</div>
@@ -669,7 +671,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<div class="doc_text">
<p>
This pass, only available in <code>opt</code>, prints the SCCs of each
- function CFG to standard output in a human-readable form.
+ function CFG to standard error in a human-readable form.
</p>
</div>
@@ -678,15 +680,13 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<a name="print-dbginfo">-print-dbginfo: Print debug info in human readable form</a>
</div>
<div class="doc_text">
- <p>Pass that prints instructions, and associated debug info:
+ <p>Pass that prints instructions, and associated debug info:</p>
<ul>
<li>source/line/col information</li>
<li>original variable name</li>
<li>original type name</li>
</ul>
-
- </p>
</div>
<!-------------------------------------------------------------------------- -->
@@ -771,6 +771,17 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print " <p>\n" if !
<div class="doc_text">
<p>Pass that checks profiling information for plausibility.</p>
</div>
+<div class="doc_subsection">
+ <a name="regions">-regions: Detect single entry single exit regions in a function</a>
+</div>
+<div class="doc_text">
+ <p>
+ The <code>RegionInfo</code> pass detects single entry single exit regions in a
+ function, where a region is defined as any subgraph that is connected to the
+ remaining graph at only two spots. Furthermore, an hierarchical region tree is
+ built.
+ </p>
+</div>
<!-------------------------------------------------------------------------- -->
<div class="doc_subsection">
@@ -1537,6 +1548,24 @@ if (X &lt; 3) {</pre>
<!-------------------------------------------------------------------------- -->
<div class="doc_subsection">
+ <a name="loweratomic">-loweratomic: Lower atomic intrinsics</a>
+</div>
+<div class="doc_text">
+ <p>
+ This pass lowers atomic intrinsics to non-atomic form for use in a known
+ non-preemptible environment.
+ </p>
+
+ <p>
+ The pass does not verify that the environment is non-preemptible (in
+ general this would require knowledge of the entire call graph of the
+ program including any libraries which may not be available in bitcode form);
+ it simply lowers every atomic intrinsic.
+ </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
<a name="lowerinvoke">-lowerinvoke: Lower invoke and unwind, for unwindless code generators</a>
</div>
<div class="doc_text">
@@ -1929,12 +1958,13 @@ if (X &lt; 3) {</pre>
<a name="strip-debug-declare">-strip-debug-declare: Strip all llvm.dbg.declare intrinsics</a>
</div>
<div class="doc_text">
- <p>This pass implements code stripping. Specifically, it can delete:
+ <p>This pass implements code stripping. Specifically, it can delete:</p>
<ul>
<li>names for virtual registers</li>
<li>symbols for internal globals and functions</li>
<li>debug information</li>
</ul>
+ <p>
Note that this transformation makes code much less readable, so it should
only be used in situations where the 'strip' utility would be used, such as
reducing code size or making it harder to reverse engineer code.
@@ -1946,12 +1976,13 @@ if (X &lt; 3) {</pre>
<a name="strip-nondebug">-strip-nondebug: Strip all symbols, except dbg symbols, from a module</a>
</div>
<div class="doc_text">
- <p>This pass implements code stripping. Specifically, it can delete:
+ <p>This pass implements code stripping. Specifically, it can delete:</p>
<ul>
<li>names for virtual registers</li>
<li>symbols for internal globals and functions</li>
<li>debug information</li>
</ul>
+ <p>
Note that this transformation makes code much less readable, so it should
only be used in situations where the 'strip' utility would be used, such as
reducing code size or making it harder to reverse engineer code.
@@ -2211,7 +2242,7 @@ if (X &lt; 3) {</pre>
<a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-06 17:52:15 +0200 (Tue, 06 Jul 2010) $
+ Last modified: $Date: 2010-08-20 03:03:44 +0200 (Fri, 20 Aug 2010) $
</address>
</body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 9992cd91b652..8fdd8a00b9bc 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -309,8 +309,6 @@ to write maintainable code more than where to put your curly braces.</p>
<div class="doc_text">
<ol>
-<li><a href="http://www.psc.edu/%7Esemke/cvs_branches.html">CVS
-Branch and Tag Primer</a></li>
<li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
static and shared libraries across platforms</a></li>
</ol>
@@ -1436,7 +1434,7 @@ to the key string for a value.</p>
<p>The StringMap is very fast for several reasons: quadratic probing is very
cache efficient for lookups, the hash value of strings in buckets is not
-recomputed when lookup up an element, StringMap rarely has to touch the
+recomputed when looking up an element, StringMap rarely has to touch the
memory for unrelated objects when looking up a value (even when hash collisions
happen), hash table growth does not recompute the hash values for strings
already in the table, and each pair in the map is store in a single allocation
@@ -3942,7 +3940,7 @@ arguments. An argument has a pointer to the parent Function.</p>
<a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-15 00:38:02 +0200 (Thu, 15 Jul 2010) $
+ Last modified: $Date: 2010-08-04 17:59:16 +0200 (Wed, 04 Aug 2010) $
</address>
</body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index a5a35dd84e67..d346e1ccb1f3 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -67,9 +67,7 @@ Almost dead code.
include/llvm/Analysis/LiveValues.h => Dan
lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
llvm/Analysis/PointerTracking.h => Edwin wants this, consider for 2.8.
- ABCD, GEPSplitterPass
- MSIL backend?
- lib/Transforms/Utils/SSI.cpp -> ABCD depends on it.
+ GEPSplitterPass
-->
@@ -78,6 +76,7 @@ Almost dead code.
strong phi elim
llvm.dbg.value: variable debug info for optimized code
loop dependence analysis
+ TBAA
-->
<!-- for announcement email:
@@ -118,40 +117,9 @@ modular, library-based architecture that makes it suitable for creating or
integrating with other development tools. Clang is considered a
production-quality compiler for C and Objective-C on x86 (32- and 64-bit).</p>
-<p>In the LLVM 2.7 time-frame, the Clang team has made many improvements:</p>
+<p>In the LLVM 2.8 time-frame, the Clang team has made many improvements:</p>
<ul>
-
-<li>C++ Support: Clang is now capable of self-hosting! While still
-alpha-quality, Clang's C++ support has matured enough to build LLVM and Clang,
-and C++ is now enabled by default. See the <a
-href="http://clang.llvm.org/cxx_compatibility.html">Clang C++ compatibility
-page</a> for common C++ migration issues.</li>
-
-<li>Objective-C: Clang now includes experimental support for an updated
-Objective-C ABI on non-Darwin platforms. This includes support for non-fragile
-instance variables and accelerated proxies, as well as greater potential for
-future optimisations. The new ABI is used when compiling with the
--fobjc-nonfragile-abi and -fgnu-runtime options. Code compiled with these
-options may be mixed with code compiled with GCC or clang using the old GNU ABI,
-but requires the libobjc2 runtime from the GNUstep project.</li>
-
-<li>New warnings: Clang contains a number of new warnings, including
-control-flow warnings (unreachable code, missing return statements in a
-non-<code>void</code> function, etc.), sign-comparison warnings, and improved
-format-string warnings.</li>
-
-<li>CIndex API and Python bindings: Clang now includes a C API as part of the
-CIndex library. Although we may make some changes to the API in the future, it
-is intended to be stable and has been designed for use by external projects. See
-the Clang
-doxygen <a href="http://clang.llvm.org/doxygen/group__CINDEX.html">CIndex</a>
-documentation for more details. The CIndex API also includes a preliminary
-set of Python bindings.</li>
-
-<li>ARM Support: Clang now has ABI support for both the Darwin and Linux ARM
-ABIs. Coupled with many improvements to the LLVM ARM backend, Clang is now
-suitable for use as a beta quality ARM compiler.</li>
</ul>
</div>
@@ -170,10 +138,7 @@ suitable for use as a beta quality ARM compiler.</li>
future</a>!). The tool is very good at finding bugs that occur on specific
paths through code, such as on error conditions.</p>
-<p>In the LLVM 2.7 time-frame, the analyzer core has made several major and
- minor improvements, including better support for tracking the fields of
- structures, initial support (not enabled by default yet) for doing
- interprocedural (cross-function) analysis, and new checks have been added.
+<p>In the LLVM 2.8 time-frame,
</p>
</div>
@@ -190,26 +155,8 @@ a JVM and a CLI Virtual Machine (Microsoft .NET is an
implementation of the CLI) using LLVM for static and just-in-time
compilation.</p>
-<p>
-With the release of LLVM 2.7, VMKit has shifted to a great framework for writing
-virtual machines. VMKit now offers precise and efficient garbage collection with
-multi-threading support, thanks to the MMTk memory management toolkit, as well
-as just in time and ahead of time compilation with LLVM. The major changes in
-VMKit 0.27 are:</p>
+<p>With the release of LLVM 2.8, ...</p>
-<ul>
-
-<li>Garbage collection: VMKit now uses the MMTk toolkit for garbage collectors.
- The first collector to be ported is the MarkSweep collector, which is precise,
- and drastically improves the performance of VMKit.</li>
-<li>Line number information in the JVM: by using the debug metadata of LLVM, the
- JVM now supports precise line number information, useful when printing a stack
- trace.</li>
-<li>Interface calls in the JVM: we implemented a variant of the Interface Method
- Table technique for interface calls in the JVM.
-</li>
-
-</ul>
</div>
@@ -231,8 +178,10 @@ libgcc routines).</p>
<p>
All of the code in the compiler-rt project is available under the standard LLVM
-License, a "BSD-style" license. New in LLVM 2.7: compiler_rt now
-supports ARM targets.</p>
+License, a "BSD-style" license. New in LLVM 2.8:
+
+Soft float support
+</p>
</div>
@@ -265,7 +214,7 @@ supported, and only on linux and darwin (darwin needs an additional gcc patch).
</p>
<p>
-DragonEgg is a new project which is seeing its first release with llvm-2.7.
+2.8 status here.
</p>
</div>
@@ -288,23 +237,13 @@ href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro to the
LLVM MC Project Blog Post</a>.
</p>
-<p>2.7 includes major parts of the work required by the new MC Project. A few
- targets have been refactored to support it, and work is underway to support a
- native assembler in LLVM. This work is not complete in LLVM 2.7, but it has
- made substantially more progress on LLVM mainline.</p>
-
-<p>One minor example of what MC can do is to transcode an AT&amp;T syntax
- X86 .s file into intel syntax. You can do this with something like:</p>
-<pre>
- llvm-mc foo.s -output-asm-variant=1 -o foo-intel.s
-</pre>
-
+<p>2.8 status here</p>
</div>
<!-- *********************************************************************** -->
<div class="doc_section">
- <a name="externalproj">External Open Source Projects Using LLVM 2.7</a>
+ <a name="externalproj">External Open Source Projects Using LLVM 2.8</a>
</div>
<!-- *********************************************************************** -->
@@ -312,171 +251,13 @@ LLVM MC Project Blog Post</a>.
<p>An exciting aspect of LLVM is that it is used as an enabling technology for
a lot of other language and tools projects. This section lists some of the
- projects that have already been updated to work with LLVM 2.7.</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="pure">Pure</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://pure-lang.googlecode.com/">Pure</a>
-is an algebraic/functional programming language based on term rewriting.
-Programs are collections of equations which are used to evaluate expressions in
-a symbolic fashion. Pure offers dynamic typing, eager and lazy evaluation,
-lexical closures, a hygienic macro system (also based on term rewriting),
-built-in list and matrix support (including list and matrix comprehensions) and
-an easy-to-use C interface. The interpreter uses LLVM as a backend to
- JIT-compile Pure programs to fast native code.</p>
-
-<p>Pure versions 0.43 and later have been tested and are known to work with
-LLVM 2.7 (and continue to work with older LLVM releases >= 2.5).</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="RoadsendPHP">Roadsend PHP</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
-source implementation of the PHP programming
-language that uses LLVM for its optimizer, JIT and static compiler. This is a
-reimplementation of an earlier project that is now based on LLVM.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="UnladenSwallow">Unladen Swallow</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://code.google.com/p/unladen-swallow/">Unladen Swallow</a> is a
-branch of <a href="http://python.org/">Python</a> intended to be fully
-compatible and significantly faster. It uses LLVM's optimization passes and JIT
-compiler.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="tce">TTA-based Codesign Environment (TCE)</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
-application-specific processors (ASP) based on the Transport triggered
-architecture (TTA). The toolset provides a complete co-design flow from C/C++
-programs down to synthesizable VHDL and parallel program binaries. Processor
-customization points include the register files, function units, supported
-operations, and the interconnection network.</p>
-
-<p>TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target
-independent optimizations and also for parts of code generation. It generates
-new LLVM-based code generators "on the fly" for the designed TTA processors and
-loads them in to the compiler backend as runtime libraries to avoid per-target
-recompilation of larger parts of the compiler chain.</p>
-
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="safecode">SAFECode Compiler</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://safecode.cs.illinois.edu">SAFECode</a> is a memory safe C
-compiler built using LLVM. It takes standard, unannotated C code, analyzes the
-code to ensure that memory accesses and array indexing operations are safe, and
-instruments the code with run-time checks when safety cannot be proven
-statically.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="icedtea">IcedTea Java Virtual Machine Implementation</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
-harness to build OpenJDK using only free software build tools and to provide
-replacements for the not-yet free parts of OpenJDK. One of the extensions that
-IcedTea provides is a new JIT compiler named <a
-href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
-to provide native code generation without introducing processor-dependent
-code.
-</p>
-<p>Icedtea6 1.8 and later have been tested and are known to work with
-LLVM 2.7 (and continue to work with older LLVM releases >= 2.6 as well).
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="llvm-lua">LLVM-Lua</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://code.google.com/p/llvm-lua/">LLVM-Lua</a> uses LLVM
- to add JIT and static compiling support to the Lua VM. Lua
-bytecode is analyzed to remove type checks, then LLVM is used to compile the
-bytecode down to machine code.
-</p>
-<p>LLVM-Lua 1.2.0 have been tested and is known to work with LLVM 2.7.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="MacRuby">MacRuby</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://macruby.org">MacRuby</a> is an implementation of Ruby based on
-core Mac OS technologies, sponsored by Apple Inc. It uses LLVM at runtime for
-optimization passes, JIT compilation and exception handling. It also allows
-static (ahead-of-time) compilation of Ruby code straight to machine code.
-</p>
-<p>The upcoming MacRuby 0.6 release works with LLVM 2.7.
-</p>
-</div>
-
-<!--=========================================================================-->
-<div class="doc_subsection">
-<a name="GHC">Glasgow Haskell Compiler (GHC)</a>
-</div>
-
-<div class="doc_text">
-<p>
-<a href="http://www.haskell.org/ghc/">GHC</a> is an open source,
-state-of-the-art programming suite for Haskell, a standard lazy
-functional programming language. It includes an optimizing static
-compiler generating good code for a variety of platforms, together
-with an interactive system for convenient, quick development.</p>
-
-<p>In addition to the existing C and native code generators, GHC now
-supports an <a
-href="http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/Backends/LLVM">LLVM
-code generator</a>. GHC supports LLVM 2.7.</p>
-
+ projects that have already been updated to work with LLVM 2.8.</p>
</div>
<!-- *********************************************************************** -->
<div class="doc_section">
- <a name="whatsnew">What's New in LLVM 2.7?</a>
+ <a name="whatsnew">What's New in LLVM 2.8?</a>
</div>
<!-- *********************************************************************** -->
@@ -496,29 +277,11 @@ in this section.
<div class="doc_text">
-<p>In addition to changes to the code, between LLVM 2.6 and 2.7, a number of
+<p>In addition to changes to the code, between LLVM 2.7 and 2.8, a number of
organization changes have happened:
</p>
<ul>
-<li>LLVM has a new <a href="http://llvm.org/Logo.html">official logo</a>!</li>
-
-<li>Ted Kremenek and Doug Gregor have stepped forward as <a
- href="http://llvm.org/docs/DeveloperPolicy.html#owners">Code Owners</a> of the
- Clang static analyzer and the Clang frontend, respectively.</li>
-
-<li>LLVM now has an <a href="http://blog.llvm.org">official Blog</a> at
- <a href="http://blog.llvm.org">http://blog.llvm.org</a>. This is a great way
- to learn about new LLVM-related features as they are implemented. Several
- features in this release are already explained on the blog.</li>
-
-<li>The LLVM web pages are now checked into the SVN server, in the "www",
- "www-pubs" and "www-releases" SVN modules. Previously they were hidden in a
- largely inaccessible old CVS server.</li>
-
-<li><a href="http://llvm.org">llvm.org</a> is now hosted on a new (and much
- faster) server. It is still graciously hosted at the University of Illinois
- of Urbana Champaign.</li>
</ul>
</div>
@@ -529,43 +292,10 @@ organization changes have happened:
<div class="doc_text">
-<p>LLVM 2.7 includes several major new capabilities:</p>
+<p>LLVM 2.8 includes several major new capabilities:</p>
<ul>
-<li>2.7 includes initial support for the <a
- href="http://en.wikipedia.org/wiki/MicroBlaze">MicroBlaze</a> target.
- MicroBlaze is a soft processor core designed for Xilinx FPGAs.</li>
-
-<li>2.7 includes a new LLVM IR "extensible metadata" feature. This feature
- supports many different use cases, including allowing front-end authors to
- encode source level information into LLVM IR, which is consumed by later
- language-specific passes. This is a great way to do high-level optimizations
- like devirtualization, type-based alias analysis, etc. See the <a
- href="http://blog.llvm.org/2010/04/extensible-metadata-in-llvm-ir.html">
- Extensible Metadata Blog Post</a> for more information.</li>
-
-<li>2.7 encodes <a href="SourceLevelDebugging.html">debug information</a>
-in a completely new way, built on extensible metadata. The new implementation
-is much more memory efficient and paves the way for improvements to optimized
-code debugging experience.</li>
-
-<li>2.7 now directly supports taking the address of a label and doing an
- indirect branch through a pointer. This is particularly useful for
- interpreter loops, and is used to implement the GCC "address of label"
- extension. For more information, see the <a
-href="http://blog.llvm.org/2010/01/address-of-label-and-indirect-branches.html">
-Address of Label and Indirect Branches in LLVM IR Blog Post</a>.
-
-<li>2.7 is the first release to start supporting APIs for assembling and
- disassembling target machine code. These APIs are useful for a variety of
- low level clients, and are surfaced in the new "enhanced disassembly" API.
- For more information see the <a
- href="http://blog.llvm.org/2010/01/x86-disassembler.html">The X86
- Disassembler Blog Post</a> for more information.</li>
-
-<li>2.7 includes major parts of the work required by the new MC Project,
- see the <a href="#mc">MC update above</a> for more information.</li>
-
+<li>.</li>
</ul>
</div>
@@ -580,30 +310,56 @@ Address of Label and Indirect Branches in LLVM IR Blog Post</a>.
expose new optimization opportunities:</p>
<ul>
-<li>LLVM IR now supports a 16-bit "half float" data type through <a
- href="LangRef.html#int_fp16">two new intrinsics</a> and APFloat support.</li>
-<li>LLVM IR supports two new <a href="LangRef.html#fnattrs">function
- attributes</a>: inlinehint and alignstack(n). The former is a hint to the
- optimizer that a function was declared 'inline' and thus the inliner should
- weight it higher when considering inlining it. The later
- indicates to the code generator that the function diverges from the platform
- ABI on stack alignment.</li>
-<li>The new <a href="LangRef.html#int_objectsize">llvm.objectsize</a> intrinsic
- allows the optimizer to infer the sizes of memory objects in some cases.
- This intrinsic is used to implement the GCC <tt>__builtin_object_size</tt>
- extension.</li>
-<li>LLVM IR now supports marking load and store instructions with <a
- href="LangRef.html#i_load">"non-temporal" hints</a> (building on the new
- metadata feature). This hint encourages the code
- generator to generate non-temporal accesses when possible, which are useful
- for code that is carefully managing cache behavior. Currently, only the
- X86 backend provides target support for this feature.</li>
-
-<li>LLVM 2.7 has pre-alpha support for <a
- href="LangRef.html#t_union">unions in LLVM IR</a>.
- Unfortunately, this support is not really usable in 2.7, so if you're
- interested in pushing it forward, please help contribute to LLVM mainline.</li>
+<li>LLVM 2.8 changes the internal order of operands in <a
+ href="http://llvm.org/doxygen/classllvm_1_1InvokeInst.html"><tt>InvokeInst</tt></a>
+ and <a href="http://llvm.org/doxygen/classllvm_1_1CallInst.html"><tt>CallInst</tt></a>.
+ To be portable across releases, resort to <tt>CallSite</tt> and the
+ high-level accessors, such as <tt>getCalledValue</tt> and <tt>setUnwindDest</tt>.
+</li>
+<li>
+ You can no longer pass use_iterators directly to cast<> (and similar), because
+ these routines tend to perform costly dereference operations more than once. You
+ have to dereference the iterators yourself and pass them in.
+</li>
+<li>
+ llvm.memcpy.*, llvm.memset.*, llvm.memmove.* (and possibly other?) intrinsics
+ take an extra parameter now (i1 isVolatile), totaling 5 parameters.
+ If you were creating these intrinsic calls and prototypes yourself (as opposed
+ to using Intrinsic::getDeclaration), you can use UpgradeIntrinsicFunction/UpgradeIntrinsicCall
+ to be portable accross releases.
+ Note that you cannot use Intrinsic::getDeclaration() in a backwards compatible
+ way (needs 2/3 types now, in 2.7 it needed just 1).
+</li>
+<li>
+ SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode.
+ Change your code to use
+ SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)).
+</li>
+<li>
+ VISIBILITY_HIDDEN is gone.
+</li>
+<li>
+ The <tt>RegisterPass</tt> and <tt>RegisterAnalysisGroup</tt> templates are
+ considered deprecated, but continue to function in LLVM 2.8. Clients are
+ strongly advised to use the upcoming <tt>INITIALIZE_PASS()</tt> and
+ <tt>INITIALIZE_AG_PASS()</tt> macros instead.
+<li>
+ SMDiagnostic takes different parameters now. //FIXME: how to upgrade?
+</li>
+<li>
+ The constructor for the Triple class no longer tries to understand odd triple
+ specifications. Frontends should ensure that they only pass valid triples to
+ LLVM. The Triple::normalize utility method has been added to help front-ends
+ deal with funky triples.
+<li>
+ Some APIs got renamed:
+ <ul>
+ <li>llvm_report_error -&gt; report_fatal_error</li>
+ <li>llvm_install_error_handler -&gt; install_fatal_error_handler</li>
+ <li>llvm::DwarfExceptionHandling -&gt; llvm::JITExceptionHandling</li>
+ </ul>
+</li>
</ul>
</div>
@@ -620,48 +376,7 @@ release includes a few major enhancements and additions to the optimizers:</p>
<ul>
-<li>The inliner now merges arrays stack objects in different callees when
- inlining multiple call sites into one function. This reduces the stack size
- of the resultant function.</li>
-<li>The -basicaa alias analysis pass (which is the default) has been improved to
- be less dependent on "type safe" pointers. It can now look through bitcasts
- and other constructs more aggressively, allowing better load/store
- optimization.</li>
-<li>The load elimination optimization in the GVN Pass [<a
-href="http://blog.llvm.org/2009/12/introduction-to-load-elimination-in-gvn.html">intro
- blog post</a>] has been substantially improved to be more aggressive about
- partial redundancy elimination and do more aggressive phi translation. Please
- see the <a
- href="http://blog.llvm.org/2009/12/advanced-topics-in-redundant-load.html">
- Advanced Topics in Redundant Load Elimination with a Focus on PHI Translation
- Blog Post</a> for more details.</li>
-<li>The module <a href="LangRef.html#datalayout">target data string</a> now
- includes a notion of 'native' integer data types for the target. This
- helps mid-level optimizations avoid promoting complex sequences of
- operations to data types that are not natively supported (e.g. converting
- i32 operations to i64 on 32-bit chips).</li>
-<li>The mid-level optimizer is now conservative when operating on a module with
- no target data. Previously, it would default to SparcV9 settings, which is
- not what most people expected.</li>
-<li>Jump threading is now much more aggressive at simplifying correlated
- conditionals and threading blocks with otherwise complex logic. It has
- subsumed the old "Conditional Propagation" pass, and -condprop has been
- removed from LLVM 2.7.</li>
-<li>The -instcombine pass has been refactored from being one huge file to being
- a library of its own. Internally, it uses a customized IRBuilder to clean
- it up and simplify it.</li>
-
-<li>The optimal edge profiling pass is reliable and much more complete than in
- 2.6. It can be used with the llvm-prof tool but isn't wired up to the
- llvm-gcc and clang command line options yet.</li>
-
-<li>A new experimental alias analysis implementation, -scev-aa, has been added.
- It uses LLVM's Scalar Evolution implementation to do symbolic analysis of
- pointer offset expressions to disambiguate pointers. It can catch a few
- cases that basicaa cannot, particularly in complex loop nests.</li>
-
-<li>The default pass ordering has been tweaked for improved optimization
- effectiveness.</li>
+<li></li>
</ul>
@@ -676,19 +391,7 @@ href="http://blog.llvm.org/2009/12/introduction-to-load-elimination-in-gvn.html"
<div class="doc_text">
<ul>
-<li>The JIT now supports generating debug information and is compatible with
-the new GDB 7.0 (and later) interfaces for registering dynamically generated
-debug info.</li>
-
-<li>The JIT now <a href="http://llvm.org/PR5184">defaults
-to compiling eagerly</a> to avoid a race condition in the lazy JIT.
-Clients that still want the lazy JIT can switch it on by calling
-<tt>ExecutionEngine::DisableLazyCompilation(false)</tt>.</li>
-
-<li>It is now possible to create more than one JIT instance in the same process.
-These JITs can generate machine code in parallel,
-although <a href="http://llvm.org/docs/ProgrammersManual.html#jitthreading">you
-still have to obey the other threading restrictions</a>.</li>
+<li></li>
</ul>
@@ -706,49 +409,7 @@ infrastructure, which allows us to implement more aggressive algorithms and make
it run faster:</p>
<ul>
-<li>The 'llc -asm-verbose' option (which is now the default) has been enhanced
- to emit many useful comments to .s files indicating information about spill
- slots and loop nest structure. This should make it much easier to read and
- understand assembly files. This is wired up in llvm-gcc and clang to
- the <tt>-fverbose-asm</tt> option.</li>
-
-<li>New LSR with "full strength reduction" mode, which can reduce address
- register pressure in loops where address generation is important.</li>
-
-<li>A new codegen level Common Subexpression Elimination pass (MachineCSE)
- is available and enabled by default. It catches redundancies exposed by
- lowering.</li>
-<li>A new pre-register-allocation tail duplication pass is available and enabled
- by default, it can substantially improve branch prediction quality in some
- cases.</li>
-<li>A new sign and zero extension optimization pass (OptimizeExtsPass)
- is available and enabled by default. This pass can takes advantage
- architecture features like x86-64 implicit zero extension behavior and
- sub-registers.</li>
-<li>The code generator now supports a mode where it attempts to preserve the
- order of instructions in the input code. This is important for source that
- is hand scheduled and extremely sensitive to scheduling. It is compatible
- with the GCC <tt>-fno-schedule-insns</tt> option.</li>
-<li>The target-independent code generator now supports generating code with
- arbitrary numbers of result values. Returning more values than was
- previously supported is handled by returning through a hidden pointer. In
- 2.7, only the X86 and XCore targets have adopted support for this
- though.</li>
-<li>The code generator now supports generating code that follows the
- <a href="LangRef.html#callingconv">Glasgow Haskell Compiler Calling
- Convention</a> and ABI.</li>
-<li>The "<a href="CodeGenerator.html#selectiondag_select">DAG instruction
- selection</a>" phase of the code generator has been largely rewritten for
- 2.7. Previously, tblgen spit out tons of C++ code which was compiled and
- linked into the target to do the pattern matching, now it emits a much
- smaller table which is read by the target-independent code. The primary
- advantages of this approach is that the size and compile time of various
- targets is much improved. The X86 code generator shrunk by 1.5MB of code,
- for example.</li>
-<li>Almost the entire code generator has switched to emitting code through the
- MC interfaces instead of printing textually to the .s file. This led to a
- number of cleanups and speedups. In 2.7, debug an exception handling
- information does not go through MC yet.</li>
+<li>MachO writer works.</li>
</ul>
</div>
@@ -762,11 +423,9 @@ it run faster:</p>
</p>
<ul>
-<li>The X86 backend now optimizes tails calls much more aggressively for
- functions that use the standard C calling convention.</li>
-<li>The X86 backend now models scalar SSE registers as subregs of the SSE vector
- registers, making the code generator more aggressive in cases where scalars
- and vector types are mixed.</li>
+<li>The X86 backend now supports holding X87 floating point stack values
+ in registers across basic blocks, dramatically improving performance of code
+ that uses long double, and when targetting CPUs that don't support SSE.</li>
</ul>
@@ -783,27 +442,7 @@ it run faster:</p>
<ul>
-<li>The ARM backend now generates instructions in unified assembly syntax.</li>
-
-<li>llvm-gcc now has complete support for the ARM v7 NEON instruction set. This
- support differs slightly from the GCC implementation. Please see the
- <a
-href="http://blog.llvm.org/2010/04/arm-advanced-simd-neon-intrinsics-and.html">
- ARM Advanced SIMD (NEON) Intrinsics and Types in LLVM Blog Post</a> for
- helpful information if migrating code from GCC to LLVM-GCC.</li>
-
-<li>The ARM and Thumb code generators now use register scavenging for stack
- object address materialization. This allows the use of R3 as a general
- purpose register in Thumb1 code, as it was previous reserved for use in
- stack address materialization. Secondly, sequential uses of the same
- value will now re-use the materialized constant.</li>
-
-<li>The ARM backend now has good support for ARMv4 targets and has been tested
- on StrongARM hardware. Previously, LLVM only supported ARMv4T and
- newer chips.</li>
-
-<li>Atomic builtins are now supported for ARMv6 and ARMv7 (__sync_synchronize,
- __sync_fetch_and_add, etc.).</li>
+<li></li>
</ul>
@@ -822,34 +461,7 @@ href="http://blog.llvm.org/2010/04/arm-advanced-simd-neon-intrinsics-and.html">
</p>
<ul>
-<li>The optimizer uses the new CodeMetrics class to measure the size of code.
- Various passes (like the inliner, loop unswitcher, etc) all use this to make
- more accurate estimates of the code size impact of various
- optimizations.</li>
-<li>A new <a href="http://llvm.org/doxygen/InstructionSimplify_8h-source.html">
- llvm/Analysis/InstructionSimplify.h</a> interface is available for doing
- symbolic simplification of instructions (e.g. <tt>a+0</tt> -&gt; <tt>a</tt>)
- without requiring the instruction to exist. This centralizes a lot of
- ad-hoc symbolic manipulation code scattered in various passes.</li>
-<li>The optimizer now uses a new <a
- href="http://llvm.org/doxygen/SSAUpdater_8h-source.html">SSAUpdater</a>
- class which efficiently supports
- doing unstructured SSA update operations. This centralized a bunch of code
- scattered throughout various passes (e.g. jump threading, lcssa,
- loop rotate, etc) for doing this sort of thing. The code generator has a
- similar <a href="http://llvm.org/doxygen/MachineSSAUpdater_8h-source.html">
- MachineSSAUpdater</a> class.</li>
-<li>The <a href="http://llvm.org/doxygen/Regex_8h-source.html">
- llvm/Support/Regex.h</a> header exposes a platform independent regular
- expression API. Building on this, the <a
- href="TestingGuide.html#FileCheck">FileCheck</a> utility now supports
- regular exressions.</li>
-<li>raw_ostream now supports a circular "debug stream" accessed with "dbgs()".
- By default, this stream works the same way as "errs()", but if you pass
- <tt>-debug-buffer-size=1000</tt> to opt, the debug stream is capped to a
- fixed sized circular buffer and the output is printed at the end of the
- program's execution. This is helpful if you have a long lived compiler
- process and you're interested in seeing snapshots in time.</li>
+<li></li>
</ul>
@@ -864,16 +476,7 @@ href="http://blog.llvm.org/2010/04/arm-advanced-simd-neon-intrinsics-and.html">
<p>Other miscellaneous features include:</p>
<ul>
-<li>You can now build LLVM as a big dynamic library (e.g. "libllvm2.7.so"). To
- get this, configure LLVM with the --enable-shared option.</li>
-
-<li>LLVM command line tools now overwrite their output by default. Previously,
- they would only do this with -f. This makes them more convenient to use, and
- behave more like standard unix tools.</li>
-
-<li>The opt and llc tools now autodetect whether their input is a .ll or .bc
- file, and automatically do the right thing. This means you don't need to
- explicitly use the llvm-as tool for most things.</li>
+<li></li>
</ul>
</div>
@@ -887,48 +490,21 @@ href="http://blog.llvm.org/2010/04/arm-advanced-simd-neon-intrinsics-and.html">
<div class="doc_text">
<p>If you're already an LLVM user or developer with out-of-tree changes based
-on LLVM 2.6, this section lists some "gotchas" that you may run into upgrading
+on LLVM 2.7, this section lists some "gotchas" that you may run into upgrading
from the previous release.</p>
<ul>
-
-<li>
-The Andersen's alias analysis ("anders-aa") pass, the Predicate Simplifier
-("predsimplify") pass, the LoopVR pass, the GVNPRE pass, and the random sampling
-profiling ("rsprofiling") passes have all been removed. They were not being
-actively maintained and had substantial problems. If you are interested in
-these components, you are welcome to ressurect them from SVN, fix the
-correctness problems, and resubmit them to mainline.</li>
-
-<li>LLVM now defaults to building most libraries with RTTI turned off, providing
-a code size reduction. Packagers who are interested in building LLVM to support
-plugins that require RTTI information should build with "make REQUIRE_RTTI=1"
-and should read the new <a href="Packaging.html">Advice on Packaging LLVM</a>
-document.</li>
-
-<li>The LLVM interpreter now defaults to <em>not</em> using <tt>libffi</tt> even
-if you have it installed. This makes it more likely that an LLVM built on one
-system will work when copied to a similar system. To use <tt>libffi</tt>,
-configure with <tt>--enable-libffi</tt>.</li>
-
-<li>Debug information uses a completely different representation, an LLVM 2.6
-.bc file should work with LLVM 2.7, but debug info won't come forward.</li>
-
-<li>The LLVM 2.6 (and earlier) "malloc" and "free" instructions got removed,
- along with LowerAllocations pass. Now you should just use a call to the
- malloc and free functions in libc. These calls are optimized as well as
- the old instructions were.</li>
+<li>.ll file doesn't produce #uses comments anymore, to get them, run a .bc file
+ through "llvm-dis --show-annotations".</li>
+<li>MSIL Backend removed.</li>
+<li>ABCD and SSI passes removed.</li>
+<li>'Union' LLVM IR feature removed.</li>
</ul>
<p>In addition, many APIs have changed in this release. Some of the major LLVM
API changes are:</p>
<ul>
-
-<li>The <tt>add</tt>, <tt>sub</tt>, and <tt>mul</tt> instructions no longer
-support floating-point operands. The <tt>fadd</tt>, <tt>fsub</tt>, and
-<tt>fmul</tt> instructions should be used for this purpose instead.</li>
-
</ul>
</div>
@@ -985,6 +561,9 @@ See: <a href="GettingStarted.html#brokengcc">Broken versions of GCC and other to
However, A <a href="http://pkg.auroraux.org/GCC">Modern GCC Build</a>
for x86/x86-64 has been made available from the third party AuroraUX Project
that has been meticulously tested for bootstrapping LLVM &amp; Clang.</li>
+<li>There have been reports of Solaris and/or OpenSolaris build failures due
+to an incompatibility in the nm program as well. The nm from binutils does seem
+to work.</li>
</ul>
</div>
@@ -1004,11 +583,10 @@ components, please contact us on the <a
href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
<ul>
-<li>The MSIL, Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze
+<li>The Alpha, SPU, MIPS, PIC16, Blackfin, MSP430, SystemZ and MicroBlaze
backends are experimental.</li>
<li><tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
- supported value for this option. The MachO writer is experimental, and
- works much better in mainline SVN.</li>
+ supported value for this option. XXX Update me</li>
</ul>
</div>
@@ -1025,8 +603,6 @@ href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
all <a href="http://llvm.org/PR879">inline assembly that uses the X86
floating point stack</a>. It supports the 'f' and 't' constraints, but not
'u'.</li>
- <li>The X86 backend generates inefficient floating point code when configured
- to generate code for systems that don't have SSE2.</li>
<li>Win64 code generation wasn't widely tested. Everything should work, but we
expect small issues to happen. Also, llvm-gcc cannot build the mingw64
runtime currently due to lack of support for the 'u' inline assembly
@@ -1230,7 +806,7 @@ lists</a>.</p>
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
<a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+ Last modified: $Date: 2010-09-03 01:22:50 +0200 (Fri, 03 Sep 2010) $
</address>
</body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 3149f46cb5c5..c7353ebb0d43 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -16,22 +16,22 @@
<li><a href="#requirements">Requirements</a></li>
<li><a href="#org">LLVM testing infrastructure organization</a>
<ul>
- <li><a href="#dejagnu">DejaGNU tests</a></li>
+ <li><a href="#regressiontests">Regression tests</a></li>
<li><a href="#testsuite">Test suite</a></li>
</ul>
</li>
<li><a href="#quick">Quick start</a>
<ul>
- <li><a href="#quickdejagnu">DejaGNU tests</a></li>
+ <li><a href="#quickregressiontests">Regression tests</a></li>
<li><a href="#quicktestsuite">Test suite</a></li>
</ul>
</li>
- <li><a href="#dgstructure">DejaGNU structure</a>
+ <li><a href="#rtstructure">Regression test structure</a>
<ul>
- <li><a href="#dgcustom">Writing new DejaGNU tests</a></li>
+ <li><a href="#rtcustom">Writing new regression tests</a></li>
<li><a href="#FileCheck">The FileCheck utility</a></li>
- <li><a href="#dgvars">Variables and substitutions</a></li>
- <li><a href="#dgfeatures">Other features</a></li>
+ <li><a href="#rtvars">Variables and substitutions</a></li>
+ <li><a href="#rtfeatures">Other features</a></li>
</ul>
</li>
<li><a href="#testsuitestructure">Test suite structure</a></li>
@@ -43,12 +43,10 @@
<li><a href="#testsuitecustom">Writing custom tests for llvm-test</a></li>
</ul>
</li>
- <li><a href="#nightly">Running the nightly tester</a></li>
</ol>
<div class="doc_author">
- <p>Written by John T. Criswell, <a
- href="http://llvm.x10sys.com/rspencer">Reid Spencer</a>, and Tanya Lattner</p>
+ <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
</div>
<!--=========================================================================-->
@@ -57,9 +55,9 @@
<div class="doc_text">
-<p>This document is the reference manual for the LLVM testing infrastructure. It documents
-the structure of the LLVM testing infrastructure, the tools needed to use it,
-and how to add and run tests.</p>
+<p>This document is the reference manual for the LLVM testing infrastructure. It
+documents the structure of the LLVM testing infrastructure, the tools needed to
+use it, and how to add and run tests.</p>
</div>
@@ -69,17 +67,9 @@ and how to add and run tests.</p>
<div class="doc_text">
-<p>In order to use the LLVM testing infrastructure, you will need all of the software
-required to build LLVM, plus the following:</p>
-
-<dl>
-<dt><a href="http://www.gnu.org/software/dejagnu/">DejaGNU</a></dt>
-<dd>The Feature and Regressions tests are organized and run by DejaGNU.</dd>
-<dt><a href="http://expect.nist.gov/">Expect</a></dt>
-<dd>Expect is required by DejaGNU.</dd>
-<dt><a href="http://www.tcl.tk/software/tcltk/">tcl</a></dt>
-<dd>Tcl is required by DejaGNU. </dd>
-</dl>
+<p>In order to use the LLVM testing infrastructure, you will need all of the
+software required to build LLVM, as well
+as <a href="http://python.org">Python</a> 2.4 or later.</p>
</div>
@@ -89,29 +79,28 @@ required to build LLVM, plus the following:</p>
<div class="doc_text">
-<p>The LLVM testing infrastructure contains two major categories of tests: code
-fragments and whole programs. Code fragments are referred to as the "DejaGNU
-tests" and are in the <tt>llvm</tt> module in subversion under the
-<tt>llvm/test</tt> directory. The whole programs tests are referred to as the
-"Test suite" and are in the <tt>test-suite</tt> module in subversion.
+<p>The LLVM testing infrastructure contains two major categories of tests:
+regression tests and whole programs. The regression tests are contained inside
+the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
+pass -- they should be run before every commit. The whole programs tests are
+referred to as the "LLVM test suite" and are in the <tt>test-suite</tt> module
+in subversion.
</p>
</div>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="dejagnu">DejaGNU tests</a></div>
+<div class="doc_subsection"><a name="regressiontests">Regression tests</a></div>
<!-- _______________________________________________________________________ -->
<div class="doc_text">
-<p>Code fragments are small pieces of code that test a specific
-feature of LLVM or trigger a specific bug in LLVM. They are usually
-written in LLVM assembly language, but can be written in other
-languages if the test targets a particular language front end (and the
-appropriate <tt>--with-llvmgcc</tt> options were used
-at <tt>configure</tt> time of the <tt>llvm</tt> module). These tests
-are driven by the DejaGNU testing framework, which is hidden behind a
-few simple makefiles.</p>
+<p>The regression tests are small pieces of code that test a specific feature of
+LLVM or trigger a specific bug in LLVM. They are usually written in LLVM
+assembly language, but can be written in other languages if the test targets a
+particular language front end (and the appropriate <tt>--with-llvmgcc</tt>
+options were used at <tt>configure</tt> time of the <tt>llvm</tt> module). These
+tests are driven by the 'lit' testing tool, which is part of LLVM.</p>
<p>These code fragments are not complete programs. The code generated
from them is never executed to determine correct behavior.</p>
@@ -158,8 +147,8 @@ generates code.</p>
<div class="doc_text">
- <p>The tests are located in two separate Subversion modules. The
- DejaGNU tests are in the main "llvm" module under the directory
+ <p>The tests are located in two separate Subversion modules. The regressions
+ tests are in the main "llvm" module under the directory
<tt>llvm/test</tt> (so you get these tests for free with the main llvm tree).
The more comprehensive test suite that includes whole
programs in C and C++ is in the <tt>test-suite</tt> module. This module should
@@ -171,10 +160,10 @@ the <tt>test-suite</tt> directory will be automatically configured.
Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="quickdejagnu">DejaGNU tests</a></div>
+<div class="doc_subsection"><a name="quickregressiontests">Regression tests</a></div>
<!-- _______________________________________________________________________ -->
-<p>To run all of the simple tests in LLVM using DejaGNU, use the master Makefile
- in the <tt>llvm/test</tt> directory:</p>
+<p>To run all of the LLVM regression tests, use master Makefile in
+ the <tt>llvm/test</tt> directory:</p>
<div class="doc_code">
<pre>
@@ -190,38 +179,47 @@ Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
</pre>
</div>
-<p>To run only a subdirectory of tests in <tt>llvm/test</tt> using DejaGNU (ie.
-Transforms), just set the TESTSUITE variable to the path of the
-subdirectory (relative to <tt>llvm/test</tt>):</p>
+<p>If you have <a href="http://clang.llvm.org">Clang</a> checked out and built,
+you can run the LLVM and Clang tests simultaneously using:</p>
+
+<p>or</p>
<div class="doc_code">
<pre>
-% gmake TESTSUITE=Transforms check
+% gmake check-all
</pre>
</div>
-<p><b>Note: If you are running the tests with <tt>objdir != subdir</tt>, you
-must have run the complete testsuite before you can specify a
-subdirectory.</b></p>
+<p>To run the tests with Valgrind (Memcheck by default), just append
+<tt>VG=1</tt> to the commands above, e.g.:</p>
-<p>To run only a single test, set <tt>TESTONE</tt> to its path (relative to
-<tt>llvm/test</tt>) and make the <tt>check-one</tt> target:</p>
+<div class="doc_code">
+<pre>
+% gmake check VG=1
+</pre>
+</div>
+
+<p>To run individual tests or subsets of tests, you can use the 'llvm-lit'
+script which is built as part of LLVM. For example, to run the
+'Integer/BitCast.ll' test by itself you can run:</p>
<div class="doc_code">
<pre>
-% gmake TESTONE=Feature/basictest.ll check-one
+% llvm-lit ~/llvm/test/Integer/BitCast.ll
</pre>
</div>
-<p>To run the tests with Valgrind (Memcheck by default), just append
-<tt>VG=1</tt> to the commands above, e.g.:</p>
+<p>or to run all of the ARM CodeGen tests:</p>
<div class="doc_code">
<pre>
-% gmake check VG=1
+% llvm-lit ~/llvm/test/CodeGen/ARM
</pre>
</div>
+<p>For more information on using the 'lit' tool, see 'llvm-lit --help' or the
+'lit' man page.</p>
+
<!-- _______________________________________________________________________ -->
<div class="doc_subsection"><a name="quicktestsuite">Test suite</a></div>
<!-- _______________________________________________________________________ -->
@@ -275,11 +273,11 @@ that subdirectory.</p>
</div>
<!--=========================================================================-->
-<div class="doc_section"><a name="dgstructure">DejaGNU structure</a></div>
+<div class="doc_section"><a name="rtstructure">Regression test structure</a></div>
<!--=========================================================================-->
<div class="doc_text">
- <p>The LLVM DejaGNU tests are driven by DejaGNU together with GNU Make and are
- located in the <tt>llvm/test</tt> directory.
+ <p>The LLVM regression tests are driven by 'lit' and are located in
+ the <tt>llvm/test</tt> directory.
<p>This directory contains a large array of small tests
that exercise various features of LLVM and to ensure that regressions do not
@@ -302,23 +300,24 @@ that subdirectory.</p>
</div>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="dgcustom">Writing new DejaGNU tests</a></div>
+<div class="doc_subsection"><a name="rtcustom">Writing new regression tests</a></div>
<!-- _______________________________________________________________________ -->
<div class="doc_text">
- <p>The DejaGNU structure is very simple, but does require some information to
- be set. This information is gathered via <tt>configure</tt> and is written
- to a file, <tt>site.exp</tt> in <tt>llvm/test</tt>. The <tt>llvm/test</tt>
- Makefile does this work for you.</p>
-
- <p>In order for DejaGNU to work, each directory of tests must have a
- <tt>dg.exp</tt> file. DejaGNU looks for this file to determine how to run the
- tests. This file is just a Tcl script and it can do anything you want, but
- we've standardized it for the LLVM regression tests. If you're adding a
+ <p>The regression test structure is very simple, but does require some
+ information to be set. This information is gathered via <tt>configure</tt> and
+ is written to a file, <tt>lit.site.cfg</tt>
+ in <tt>llvm/test</tt>. The <tt>llvm/test</tt> Makefile does this work for
+ you.</p>
+
+ <p>In order for the regression tests to work, each directory of tests must
+ have a <tt>dg.exp</tt> file. Lit looks for this file to determine how to
+ run the tests. This file is just a Tcl script and it can do anything you want,
+ but we've standardized it for the LLVM regression tests. If you're adding a
directory of tests, just copy <tt>dg.exp</tt> from another directory to get
- running. The standard <tt>dg.exp</tt> simply loads a Tcl
- library (<tt>test/lib/llvm.exp</tt>) and calls the <tt>llvm_runtests</tt>
- function defined in that library with a list of file names to run. The names
- are obtained by using Tcl's glob command. Any directory that contains only
+ running. The standard <tt>dg.exp</tt> simply loads a Tcl library
+ (<tt>test/lib/llvm.exp</tt>) and calls the <tt>llvm_runtests</tt> function
+ defined in that library with a list of file names to run. The names are
+ obtained by using Tcl's glob command. Any directory that contains only
directories does not need the <tt>dg.exp</tt> file.</p>
<p>The <tt>llvm-runtests</tt> function lookas at each file that is passed to
@@ -379,7 +378,8 @@ that subdirectory.</p>
<p>There are some quoting rules that you must pay attention to when writing
your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any
- ' or " so they will get passed to the invoked program. For example:</p>
+ quote characters so they will get passed to the invoked program. For
+ example:</p>
<div class="doc_code">
<pre>
@@ -696,7 +696,7 @@ define two separate CHECK lines that match on the same line.
</div>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="dgvars">Variables and
+<div class="doc_subsection"><a name="rtvars">Variables and
substitutions</a></div>
<!-- _______________________________________________________________________ -->
<div class="doc_text">
@@ -792,7 +792,7 @@ substitutions</a></div>
</div>
<!-- _______________________________________________________________________ -->
-<div class="doc_subsection"><a name="dgfeatures">Other Features</a></div>
+<div class="doc_subsection"><a name="rtfeatures">Other Features</a></div>
<!-- _______________________________________________________________________ -->
<div class="doc_text">
<p>To make RUN line writing easier, there are several shell scripts located
@@ -818,7 +818,7 @@ substitutions</a></div>
<p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
line near the top of the file. This signals that the test case should succeed
- if the test fails. Such test cases are counted separately by DejaGnu. To
+ if the test fails. Such test cases are counted separately by the testing tool. To
specify an expected fail, use the XFAIL keyword in the comments of the test
program followed by a colon and one or more regular expressions (separated by
a comma). The regular expressions allow you to XFAIL the test conditionally by
@@ -906,9 +906,10 @@ benchmarks, regression tests, code that is strange grammatically, etc. These
organizations should be relatively self explanatory.</p>
<p>Some tests are known to fail. Some are bugs that we have not fixed yet;
-others are features that we haven't added yet (or may never add). In DejaGNU,
-the result for such tests will be XFAIL (eXpected FAILure). In this way, you
-can tell the difference between an expected and unexpected failure.</p>
+others are features that we haven't added yet (or may never add). In the
+regression tests, the result for such tests will be XFAIL (eXpected FAILure).
+In this way, you can tell the difference between an expected and unexpected
+failure.</p>
<p>The tests in the test suite have no such feature at this time. If the
test passes, only warnings and other miscellaneous output will be generated. If
@@ -1135,66 +1136,6 @@ example reports that can do fancy stuff.</p>
</div>
-<!--=========================================================================-->
-<div class="doc_section"><a name="nightly">Running the nightly tester</a></div>
-<!--=========================================================================-->
-
-<div class="doc_text">
-
-<p>
-The <a href="http://llvm.org/nightlytest/">LLVM Nightly Testers</a>
-automatically check out an LLVM tree, build it, run the "nightly"
-program test (described above), run all of the DejaGNU tests,
-delete the checked out tree, and then submit the results to
-<a href="http://llvm.org/nightlytest/">http://llvm.org/nightlytest/</a>.
-After test results are submitted to
-<a href="http://llvm.org/nightlytest/">http://llvm.org/nightlytest/</a>,
-they are processed and displayed on the tests page. An email to
-<a href="http://lists.cs.uiuc.edu/pipermail/llvm-testresults/">
-llvm-testresults@cs.uiuc.edu</a> summarizing the results is also generated.
-This testing scheme is designed to ensure that programs don't break as well
-as keep track of LLVM's progress over time.</p>
-
-<p>If you'd like to set up an instance of the nightly tester to run on your
-machine, take a look at the comments at the top of the
-<tt>utils/NewNightlyTest.pl</tt> file. If you decide to set up a nightly tester
-please choose a unique nickname and invoke <tt>utils/NewNightlyTest.pl</tt>
-with the "-nickname [yournickname]" command line option.
-
-<p>You can create a shell script to encapsulate the running of the script.
-The optimized x86 Linux nightly test is run from just such a script:</p>
-
-<div class="doc_code">
-<pre>
-#!/bin/bash
-BASE=/proj/work/llvm/nightlytest
-export BUILDDIR=$BASE/build
-export WEBDIR=$BASE/testresults
-export LLVMGCCDIR=/proj/work/llvm/cfrontend/install
-export PATH=/proj/install/bin:$LLVMGCCDIR/bin:$PATH
-export LD_LIBRARY_PATH=/proj/install/lib
-cd $BASE
-cp /proj/work/llvm/llvm/utils/NewNightlyTest.pl .
-nice ./NewNightlyTest.pl -nice -release -verbose -parallel -enable-linscan \
- -nickname NightlyTester -noexternals &gt; output.log 2&gt;&amp;1
-</pre>
-</div>
-
-<p>It is also possible to specify the the location your nightly test results
-are submitted. You can do this by passing the command line option
-"-submit-server [server_address]" and "-submit-script [script_on_server]" to
-<tt>utils/NewNightlyTest.pl</tt>. For example, to submit to the llvm.org
-nightly test results page, you would invoke the nightly test script with
-"-submit-server llvm.org -submit-script /nightlytest/NightlyTestAccept.cgi".
-If these options are not specified, the nightly test script sends the results
-to the llvm.org nightly test results page.</p>
-
-<p>Take a look at the <tt>NewNightlyTest.pl</tt> file to see what all of the
-flags and strings do. If you start running the nightly tests, please let us
-know. Thanks!</p>
-
-</div>
-
<!-- *********************************************************************** -->
<hr>
@@ -1204,9 +1145,9 @@ know. Thanks!</p>
<a href="http://validator.w3.org/check/referer"><img
src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
- John T. Criswell, Reid Spencer, and Tanya Lattner<br>
+ John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $
+ Last modified: $Date: 2010-08-02 03:20:23 +0200 (Mon, 02 Aug 2010) $
</address>
</body>
</html>
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index aa2612c2e921..2dc0ef772cca 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -1299,9 +1299,6 @@ implementation in <tt>SparcInstrInfo.cpp</tt>:
</p>
<ul>
-<li><tt>isMoveInstr</tt> &mdash; Return true if the instruction is a register to
- register move; false, otherwise.</li>
-
<li><tt>isLoadFromStackSlot</tt> &mdash; If the specified machine instruction is
a direct load from a stack slot, return the register number of the
destination and the <tt>FrameIndex</tt> of the stack slot.</li>
@@ -2552,7 +2549,7 @@ with assembler.
<a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
<br>
- Last modified: $Date: 2010-07-11 19:01:17 +0200 (Sun, 11 Jul 2010) $
+ Last modified: $Date: 2010-07-17 00:35:46 +0200 (Sat, 17 Jul 2010) $
</address>
</body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index 94c5cebed2ab..1a6edcfc59f3 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -290,7 +290,7 @@ function.</p>
initialization value is not important.</p>
<div class="doc_code"><pre>
- RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>",
+ INITIALIZE_PASS(Hello, "<i>hello</i>", "<i>Hello World Pass</i>",
false /* Only looks at CFG */,
false /* Analysis Pass */);
} <i>// end of anonymous namespace</i>
@@ -299,7 +299,7 @@ initialization value is not important.</p>
<p>Lastly, we <a href="#registration">register our class</a> <tt>Hello</tt>,
giving it a command line
argument "<tt>hello</tt>", and a name "<tt>Hello World Pass</tt>".
-Last two RegisterPass arguments are optional. Their default value is false.
+Last two arguments describe its behavior.
If a pass walks CFG without modifying it then third argument is set to true.
If a pass is an analysis pass, for example dominator tree pass, then true
is supplied as fourth argument. </p>
@@ -326,8 +326,9 @@ is supplied as fourth argument. </p>
};
char Hello::ID = 0;
- RegisterPass&lt;Hello&gt; X("<i>hello</i>", "<i>Hello World Pass</i>");
+ INITIALIZE_PASS(Hello, "<i>Hello</i>", "<i>Hello World Pass</i>", false, false);
}
+
</pre></div>
<p>Now that it's all together, compile the file with a simple "<tt>gmake</tt>"
@@ -348,7 +349,7 @@ them) to be useful.</p>
<p>Now that you have a brand new shiny shared object file, we can use the
<tt>opt</tt> command to run an LLVM program through your pass. Because you
-registered your pass with the <tt>RegisterPass</tt> template, you will be able to
+registered your pass with the <tt>INITIALIZE_PASS</tt> macro, you will be able to
use the <tt>opt</tt> tool to access it, once loaded.</p>
<p>To test it, follow the example at the end of the <a
@@ -966,9 +967,8 @@ remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
pass registration works, and discussed some of the reasons that it is used and
what it does. Here we discuss how and why passes are registered.</p>
-<p>As we saw above, passes are registered with the <b><tt>RegisterPass</tt></b>
-template, which requires you to pass at least two
-parameters. The first parameter is the name of the pass that is to be used on
+<p>As we saw above, passes are registered with the <b><tt>INITIALIZE_PASS</tt></b>
+macro. The first parameter is the name of the pass that is to be used on
the command line to specify that the pass should be added to a program (for
example, with <tt>opt</tt> or <tt>bugpoint</tt>). The second argument is the
name of the pass, which is to be used for the <tt>-help</tt> output of
@@ -1247,7 +1247,7 @@ between passes</a> still apply.</p>
<p>Although <a href="#registration">Pass Registration</a> is optional for normal
passes, all analysis group implementations must be registered, and must use the
-<A href="#registerag"><tt>RegisterAnalysisGroup</tt></a> template to join the
+<A href="#registerag"><tt>INITIALIZE_AG_PASS</tt></a> template to join the
implementation pool. Also, a default implementation of the interface
<b>must</b> be registered with <A
href="#registerag"><tt>RegisterAnalysisGroup</tt></a>.</p>
@@ -1283,8 +1283,10 @@ hypothetical example) instead.</p>
<div class="doc_text">
<p>The <tt>RegisterAnalysisGroup</tt> template is used to register the analysis
-group itself as well as add pass implementations to the analysis group. First,
-an analysis should be registered, with a human readable name provided for it.
+group itself, while the <tt>INITIALIZE_AG_PASS</tt> is used to add pass
+implementations to the analysis group. First,
+an analysis group should be registered, with a human readable name
+provided for it.
Unlike registration of passes, there is no command line argument to be specified
for the Analysis Group Interface itself, because it is "abstract":</p>
@@ -1297,35 +1299,36 @@ implementations of the interface by using the following code:</p>
<div class="doc_code"><pre>
<b>namespace</b> {
- //<i> Analysis Group implementations <b>must</b> be registered normally...</i>
- RegisterPass&lt;FancyAA&gt;
- B("<i>somefancyaa</i>", "<i>A more complex alias analysis implementation</i>");
-
//<i> Declare that we implement the AliasAnalysis interface</i>
- RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; C(B);
+ INITIALIZE_AG_PASS(FancyAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>somefancyaa</i>",
+ "<i>A more complex alias analysis implementation</i>",
+ false, // <i>Is CFG Only?</i>
+ true, // <i>Is Analysis?</i>
+ false, // <i>Is default Analysis Group implementation?</i>
+ );
}
</pre></div>
-<p>This just shows a class <tt>FancyAA</tt> that is registered normally, then
-uses the <tt>RegisterAnalysisGroup</tt> template to "join" the <tt><a
-href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
+<p>This just shows a class <tt>FancyAA</tt> that
+uses the <tt>INITIALIZE_AG_PASS</tt> macro both to register and
+to "join" the <tt><a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
analysis group. Every implementation of an analysis group should join using
-this template. A single pass may join multiple different analysis groups with
-no problem.</p>
+this macro.</p>
<div class="doc_code"><pre>
<b>namespace</b> {
- //<i> Analysis Group implementations <b>must</b> be registered normally...</i>
- RegisterPass&lt;<a href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">BasicAliasAnalysis</a>&gt;
- D("<i>basicaa</i>", "<i>Basic Alias Analysis (default AA impl)</i>");
-
//<i> Declare that we implement the AliasAnalysis interface</i>
- RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, <b>true</b>&gt; E(D);
+ INITIALIZE_AG_PASS(BasicAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>basicaa</i>",
+ "<i>Basic Alias Analysis (default AA impl)</i>",
+ false, // <i>Is CFG Only?</i>
+ true, // <i>Is Analysis?</i>
+ true, // <i>Is default Analysis Group implementation?</i>
+ );
}
</pre></div>
-<p>Here we show how the default implementation is specified (using the extra
-argument to the <tt>RegisterAnalysisGroup</tt> template). There must be exactly
+<p>Here we show how the default implementation is specified (using the final
+argument to the <tt>INITIALIZE_AG_PASS</tt> template). There must be exactly
one default implementation available at all times for an Analysis Group to be
used. Only default implementation can derive from <tt>ImmutablePass</tt>.
Here we declare that the
@@ -1830,7 +1833,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-07-08 10:27:18 +0200 (Thu, 08 Jul 2010) $
+ Last modified: $Date: 2010-07-22 01:07:00 +0200 (Thu, 22 Jul 2010) $
</address>
</body>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index 783abb33ed77..6cd33b010adc 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -222,7 +222,7 @@ code, we do a simple switch on the opcode to create the right LLVM instruction.
<p>In the example above, the LLVM builder class is starting to show its value.
IRBuilder knows where to insert the newly created instruction, all you have to
-do is specify what instruction to create (e.g. with <tt>CreateAdd</tt>), which
+do is specify what instruction to create (e.g. with <tt>CreateFAdd</tt>), which
operands to use (<tt>L</tt> and <tt>R</tt> here) and optionally provide a name
for the generated instruction.</p>
@@ -1054,9 +1054,9 @@ Value *BinaryExprAST::Codegen() {
if (L == 0 || R == 0) return 0;
switch (Op) {
- case '+': return Builder.CreateAdd(L, R, "addtmp");
- case '-': return Builder.CreateSub(L, R, "subtmp");
- case '*': return Builder.CreateMul(L, R, "multmp");
+ case '+': return Builder.CreateFAdd(L, R, "addtmp");
+ case '-': return Builder.CreateFSub(L, R, "subtmp");
+ case '*': return Builder.CreateFMul(L, R, "multmp");
case '&lt;':
L = Builder.CreateFCmpULT(L, R, "cmptmp");
// Convert bool 0/1 to double 0.0 or 1.0
@@ -1263,7 +1263,7 @@ int main() {
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $
+ Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
</address>
</body>
</html>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index b6398ca1d650..4450f2e3a11a 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -800,7 +800,7 @@ references to it will naturally find it in the symbol table.</p>
StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
}
- Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
+ Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
</pre>
</div>
@@ -1517,7 +1517,7 @@ Value *ForExprAST::Codegen() {
StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
}
- Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
+ Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
// Compute the end condition.
Value *EndCond = End-&gt;Codegen();
@@ -1771,7 +1771,7 @@ int main() {
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $
+ Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
</address>
</body>
</html>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index 7368ea7221d9..c6a0b8a7d603 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -1540,7 +1540,7 @@ Value *ForExprAST::Codegen() {
StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
}
- Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar");
+ Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
// Compute the end condition.
Value *EndCond = End-&gt;Codegen();
@@ -1808,7 +1808,7 @@ int main() {
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-06-21 22:31:30 +0200 (Mon, 21 Jun 2010) $
+ Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
</address>
</body>
</html>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index be2d69e8f55f..1ec99b15bf5c 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -480,7 +480,7 @@ the unabridged code):</p>
<b>// Reload, increment, and restore the alloca. This handles the case where
// the body of the loop mutates the variable.
Value *CurVar = Builder.CreateLoad(Alloca);
- Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar");
+ Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);</b>
...
</pre>
@@ -1833,7 +1833,7 @@ Value *ForExprAST::Codegen() {
// Reload, increment, and restore the alloca. This handles the case where
// the body of the loop mutates the variable.
Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
- Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar");
+ Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
Builder.CreateStore(NextVar, Alloca);
// Convert condition to a bool by comparing equal to 0.0.
@@ -2158,7 +2158,7 @@ int main() {
<a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
- Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $
+ Last modified: $Date: 2010-09-01 22:09:20 +0200 (Wed, 01 Sep 2010) $
</address>
</body>
</html>
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index a443ad420ee5..8536915993ef 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -54,10 +54,10 @@ void BrainF::header(LLVMContext& C) {
//Function prototypes
- //declare void @llvm.memset.i32(i8 *, i8, i32, i32)
- const Type *Tys[] = { Type::getInt32Ty(C) };
+ //declare void @llvm.memset.p0i8.i32(i8 *, i8, i32, i32, i1)
+ const Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) };
Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset,
- Tys, 1);
+ Tys, 2);
//declare i32 @getchar()
getchar_func = cast<Function>(module->
@@ -88,13 +88,14 @@ void BrainF::header(LLVMContext& C) {
NULL, "arr");
BB->getInstList().push_back(cast<Instruction>(ptr_arr));
- //call void @llvm.memset.i32(i8 *%arr, i8 0, i32 %d, i32 1)
+ //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0)
{
Value *memset_params[] = {
ptr_arr,
ConstantInt::get(C, APInt(8, 0)),
val_mem,
- ConstantInt::get(C, APInt(32, 1))
+ ConstantInt::get(C, APInt(32, 1)),
+ ConstantInt::get(C, APInt(1, 0))
};
CallInst *memset_call = builder->
diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp
index 353e17380c6c..a7bbf8c72684 100644
--- a/examples/Fibonacci/fibonacci.cpp
+++ b/examples/Fibonacci/fibonacci.cpp
@@ -96,17 +96,22 @@ int main(int argc, char **argv) {
LLVMContext Context;
// Create some module to put our function into it.
- Module *M = new Module("test", Context);
+ OwningPtr<Module> M(new Module("test", Context));
// We are about to create the "fib" function:
- Function *FibF = CreateFibFunction(M, Context);
+ Function *FibF = CreateFibFunction(M.get(), Context);
// Now we going to create JIT
std::string errStr;
- ExecutionEngine *EE = EngineBuilder(M).setErrorStr(&errStr).setEngineKind(EngineKind::JIT).create();
+ ExecutionEngine *EE =
+ EngineBuilder(M.get())
+ .setErrorStr(&errStr)
+ .setEngineKind(EngineKind::JIT)
+ .create();
if (!EE) {
- errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr << "\n";
+ errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr
+ << "\n";
return 1;
}
@@ -127,5 +132,6 @@ int main(int argc, char **argv) {
// import result of execution
outs() << "Result: " << GV.IntVal << "\n";
+
return 0;
}
diff --git a/examples/Makefile b/examples/Makefile
index 5594c05fa3a0..bc09b8e0473b 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -1,19 +1,20 @@
##===- examples/Makefile -----------------------------------*- Makefile -*-===##
-#
+#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
-#
+#
##===----------------------------------------------------------------------===##
LEVEL=..
include $(LEVEL)/Makefile.config
-PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker
+PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker \
+ TracingBrainF
ifeq ($(HAVE_PTHREAD),1)
-PARALLEL_DIRS += ParallelJIT
+PARALLEL_DIRS += ParallelJIT
endif
ifeq ($(LLVM_ON_UNIX),1)
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 117f2d6dcff4..75cee7d203a1 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -204,8 +204,7 @@ typedef enum {
LLVMPointerTypeKind, /**< Pointers */
LLVMOpaqueTypeKind, /**< Opaque: type with unknown structure */
LLVMVectorTypeKind, /**< SIMD 'packed' format, or other vector type */
- LLVMMetadataTypeKind, /**< Metadata */
- LLVMUnionTypeKind /**< Unions */
+ LLVMMetadataTypeKind /**< Metadata */
} LLVMTypeKind;
typedef enum {
@@ -227,7 +226,9 @@ typedef enum {
LLVMGhostLinkage, /**< Obsolete */
LLVMCommonLinkage, /**< Tentative definitions */
LLVMLinkerPrivateLinkage, /**< Like Private, but linker removes. */
- LLVMLinkerPrivateWeakLinkage /**< Like LinkerPrivate, but is weak. */
+ LLVMLinkerPrivateWeakLinkage, /**< Like LinkerPrivate, but is weak. */
+ LLVMLinkerPrivateWeakDefAutoLinkage /**< Like LinkerPrivateWeak, but possibly
+ hidden. */
} LLVMLinkage;
typedef enum {
@@ -393,13 +394,6 @@ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
-/* Operations on union types */
-LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
- unsigned ElementCount);
-LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount);
-unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy);
-void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest);
-
/* Operations on array, pointer, and vector types (sequence types) */
LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
@@ -523,6 +517,8 @@ LLVMValueRef LLVMGetUsedValue(LLVMUseRef U);
/* Operations on Users */
LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
+void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val);
+int LLVMGetNumOperands(LLVMValueRef Val);
/* Operations on constants of any type */
LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
@@ -570,7 +566,6 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
LLVMBool Packed);
LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
-LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val);
/* Constant expressions */
LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal);
@@ -750,6 +745,9 @@ LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB,
const char *Name);
void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB);
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+
/* Operations on instructions */
LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst);
LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB);
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index ebb2cf8a82dd..d177381988df 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -51,41 +51,38 @@ typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
@typedef EDAssemblySyntax_t
An assembly syntax for use in tokenizing instructions.
*/
-typedef enum {
+enum {
/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
kEDAssemblySyntaxX86Intel = 0,
/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
kEDAssemblySyntaxX86ATT = 1,
kEDAssemblySyntaxARMUAL = 2
-} EDAssemblySyntax_t;
+};
+typedef unsigned EDAssemblySyntax_t;
/*!
@typedef EDDisassemblerRef
Encapsulates a disassembler for a single CPU architecture.
*/
-struct EDDisassembler;
-typedef struct EDDisassembler *EDDisassemblerRef;
+typedef void *EDDisassemblerRef;
/*!
@typedef EDInstRef
Encapsulates a single disassembled instruction in one assembly syntax.
*/
-struct EDInst;
-typedef struct EDInst *EDInstRef;
+typedef void *EDInstRef;
/*!
@typedef EDTokenRef
Encapsulates a token from the disassembly of an instruction.
*/
-struct EDToken;
-typedef struct EDToken *EDTokenRef;
+typedef void *EDTokenRef;
/*!
@typedef EDOperandRef
Encapsulates an operand of an instruction.
*/
-struct EDOperand;
-typedef struct EDOperand *EDOperandRef;
+typedef void *EDOperandRef;
/*!
@functiongroup Getting a disassembler
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index 5a98a77c5bd2..f5f40619ef0d 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -116,6 +116,8 @@ LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
LLVMValueRef *OutFn);
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn);
+
LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE);
void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index b1b9f36fb2b9..2cd15c3fa30f 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -1,26 +1,26 @@
-/*===-- llvm-c/Target.h - Target Lib C Iface --------------------*- C++ -*-===*\
-|* *|
-|* The LLVM Compiler Infrastructure *|
-|* *|
-|* This file is distributed under the University of Illinois Open Source *|
-|* License. See LICENSE.TXT for details. *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMTarget.a, which *|
-|* implements target information. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
+/*===-- llvm-c/Target.h - Target Lib C Iface --------------------*- C++ -*-===*/
+/* */
+/* The LLVM Compiler Infrastructure */
+/* */
+/* This file is distributed under the University of Illinois Open Source */
+/* License. See LICENSE.TXT for details. */
+/* */
+/*===----------------------------------------------------------------------===*/
+/* */
+/* This header declares the C interface to libLLVMTarget.a, which */
+/* implements target information. */
+/* */
+/* Many exotic languages can interoperate with C code but have a harder time */
+/* with C++ due to name mangling. So in addition to C, this interface enables */
+/* tools written in such languages. */
+/* */
+/*===----------------------------------------------------------------------===*/
#ifndef LLVM_C_TARGET_H
#define LLVM_C_TARGET_H
#include "llvm-c/Core.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
#ifdef __cplusplus
extern "C" {
@@ -64,15 +64,10 @@ static inline void LLVMInitializeAllTargets(void) {
for JIT applications to ensure that the target gets linked in correctly. */
static inline LLVMBool LLVMInitializeNativeTarget(void) {
/* If we have a native target, initialize it to ensure it is linked in. */
-#ifdef LLVM_NATIVE_ARCH
-#define DoInit2(TARG) \
- LLVMInitialize ## TARG ## Info (); \
- LLVMInitialize ## TARG ()
-#define DoInit(T) DoInit2(T)
- DoInit(LLVM_NATIVE_ARCH);
+#ifdef LLVM_NATIVE_TARGET
+ LLVM_NATIVE_TARGETINFO();
+ LLVM_NATIVE_TARGET();
return 0;
-#undef DoInit
-#undef DoInit2
#else
return 1;
#endif
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 93f37605a41b..e6f69afa2ec2 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -18,6 +18,7 @@
#include <stdbool.h>
#include <stddef.h>
+#include "llvm/System/DataTypes.h"
#define LTO_API_VERSION 3
@@ -135,11 +136,17 @@ lto_module_dispose(lto_module_t mod);
extern const char*
lto_module_get_target_triple(lto_module_t mod);
+/**
+ * Sets triple string with which the object will be codegened.
+ */
+extern void
+lto_module_set_target_triple(lto_module_t mod, const char *triple);
+
/**
* Returns the number of symbols in the object module.
*/
-extern unsigned int
+extern uint32_t
lto_module_get_num_symbols(lto_module_t mod);
@@ -147,14 +154,14 @@ lto_module_get_num_symbols(lto_module_t mod);
* Returns the name of the ith symbol in the object module.
*/
extern const char*
-lto_module_get_symbol_name(lto_module_t mod, unsigned int index);
+lto_module_get_symbol_name(lto_module_t mod, uint32_t index);
/**
* Returns the attributes of the ith symbol in the object module.
*/
extern lto_symbol_attributes
-lto_module_get_symbol_attribute(lto_module_t mod, unsigned int index);
+lto_module_get_symbol_attribute(lto_module_t mod, uint32_t index);
/**
@@ -200,11 +207,10 @@ lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model);
/**
- * Sets the location of the "gcc" to run. If not set, libLTO will search for
- * "gcc" on the path.
+ * Sets the cpu to generate code for.
*/
extern void
-lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path);
+lto_codegen_set_cpu(lto_code_gen_t cg, const char *cpu);
/**
@@ -214,6 +220,12 @@ lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path);
extern void
lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path);
+/**
+ * Sets extra arguments that libLTO should pass to the assembler.
+ */
+extern void
+lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
+ int nargs);
/**
* Adds to a list of all global symbols that must exist in the final
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 59e023b05abe..8004cb4b123b 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -464,7 +464,7 @@ public:
// For small values, return quickly
if (numBits <= APINT_BITS_PER_WORD)
return APInt(numBits, ~0ULL << shiftAmt);
- return (~APInt(numBits, 0)).shl(shiftAmt);
+ return getAllOnesValue(numBits).shl(shiftAmt);
}
/// Constructs an APInt value that has the bottom loBitsSet bits set.
@@ -481,7 +481,7 @@ public:
// For small values, return quickly.
if (numBits < APINT_BITS_PER_WORD)
return APInt(numBits, (1ULL << loBitsSet) - 1);
- return (~APInt(numBits, 0)).lshr(numBits - loBitsSet);
+ return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
}
/// The hash value is computed as the sum of the words and the bit width.
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index c53e255e1c7c..06a1575da4d0 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -185,13 +185,12 @@ public:
++NumTombstones;
return true;
}
- bool erase(iterator I) {
+ void erase(iterator I) {
BucketT *TheBucket = &*I;
TheBucket->second.~ValueT();
TheBucket->first = getTombstoneKey();
--NumEntries;
++NumTombstones;
- return true;
}
void swap(DenseMap& RHS) {
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index 938833866fcd..00bcf64a2fc7 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -58,6 +58,7 @@ public:
class Iterator {
typename MapTy::iterator I;
+ friend class DenseSet;
public:
typedef typename MapTy::iterator::difference_type difference_type;
typedef ValueT value_type;
@@ -77,6 +78,7 @@ public:
class ConstIterator {
typename MapTy::const_iterator I;
+ friend class DenseSet;
public:
typedef typename MapTy::const_iterator::difference_type difference_type;
typedef ValueT value_type;
@@ -103,6 +105,10 @@ public:
const_iterator begin() const { return ConstIterator(TheMap.begin()); }
const_iterator end() const { return ConstIterator(TheMap.end()); }
+ iterator find(const ValueT &V) { return Iterator(TheMap.find(V)); }
+ void erase(Iterator I) { return TheMap.erase(I.I); }
+ void erase(ConstIterator CI) { return TheMap.erase(CI.I); }
+
std::pair<iterator, bool> insert(const ValueT &V) {
return TheMap.insert(std::make_pair(V, 0));
}
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 5f2df2a17e41..b9e5cbdf8c6b 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -183,6 +183,16 @@ public:
inline bool nodeVisited(NodeType *Node) const {
return this->Visited.count(Node) != 0;
}
+
+ /// getPathLength - Return the length of the path from the entry node to the
+ /// current node, counting both nodes.
+ unsigned getPathLength() const { return VisitStack.size(); }
+
+ /// getPath - Return the n'th node in the path from the the entry node to the
+ /// current node.
+ NodeType *getPath(unsigned n) const {
+ return VisitStack[n].first.getPointer();
+ }
};
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index fc8490abf739..662b5e273548 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -54,9 +54,9 @@ namespace llvm {
/// void Profile(FoldingSetNodeID &ID) const {
/// ID.AddString(Name);
/// ID.AddInteger(Value);
-/// }
-/// ...
-/// };
+/// }
+/// ...
+/// };
///
/// To define the folding set itself use the FoldingSet template;
///
@@ -190,26 +190,76 @@ protected:
/// GetNodeProfile - Instantiations of the FoldingSet template implement
/// this function to gather data bits for the given node.
- virtual void GetNodeProfile(FoldingSetNodeID &ID, Node *N) const = 0;
+ virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
+ /// NodeEquals - Instantiations of the FoldingSet template implement
+ /// this function to compare the given node with the given ID.
+ virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID) const=0;
+ /// NodeEquals - Instantiations of the FoldingSet template implement
+ /// this function to compute a hash value for the given node.
+ virtual unsigned ComputeNodeHash(Node *N,
+ FoldingSetNodeID &TempID) const = 0;
};
//===----------------------------------------------------------------------===//
-/// FoldingSetTrait - This trait class is used to define behavior of how
-/// to "profile" (in the FoldingSet parlance) an object of a given type.
-/// The default behavior is to invoke a 'Profile' method on an object, but
-/// through template specialization the behavior can be tailored for specific
-/// types. Combined with the FoldingSetNodeWrapper classs, one can add objects
-/// to FoldingSets that were not originally designed to have that behavior.
+
+template<typename T> struct FoldingSetTrait;
+
+/// DefaultFoldingSetTrait - This class provides default implementations
+/// for FoldingSetTrait implementations.
///
-template<typename T> struct FoldingSetTrait {
- static inline void Profile(const T& X, FoldingSetNodeID& ID) { X.Profile(ID);}
- static inline void Profile(T& X, FoldingSetNodeID& ID) { X.Profile(ID); }
- template <typename Ctx>
- static inline void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) {
+template<typename T> struct DefaultFoldingSetTrait {
+ static void Profile(const T& X, FoldingSetNodeID& ID) {
+ X.Profile(ID);
+ }
+ static void Profile(T& X, FoldingSetNodeID& ID) {
+ X.Profile(ID);
+ }
+
+ // Equals - Test if the profile for X would match ID, using TempID
+ // to compute a temporary ID if necessary. The default implementation
+ // just calls Profile and does a regular comparison. Implementations
+ // can override this to provide more efficient implementations.
+ static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID);
+
+ // ComputeHash - Compute a hash value for X, using TempID to
+ // compute a temporary ID if necessary. The default implementation
+ // just calls Profile and does a regular hash computation.
+ // Implementations can override this to provide more efficient
+ // implementations.
+ static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID);
+};
+
+/// FoldingSetTrait - This trait class is used to define behavior of how
+/// to "profile" (in the FoldingSet parlance) an object of a given type.
+/// The default behavior is to invoke a 'Profile' method on an object, but
+/// through template specialization the behavior can be tailored for specific
+/// types. Combined with the FoldingSetNodeWrapper class, one can add objects
+/// to FoldingSets that were not originally designed to have that behavior.
+template<typename T> struct FoldingSetTrait
+ : public DefaultFoldingSetTrait<T> {};
+
+template<typename T, typename Ctx> struct ContextualFoldingSetTrait;
+
+/// DefaultContextualFoldingSetTrait - Like DefaultFoldingSetTrait, but
+/// for ContextualFoldingSets.
+template<typename T, typename Ctx>
+struct DefaultContextualFoldingSetTrait {
+ static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) {
X.Profile(ID, Context);
}
+ static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID, Ctx Context);
+ static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID,
+ Ctx Context);
};
+/// ContextualFoldingSetTrait - Like FoldingSetTrait, but for
+/// ContextualFoldingSets.
+template<typename T, typename Ctx> struct ContextualFoldingSetTrait
+ : public DefaultContextualFoldingSetTrait<T, Ctx> {};
+
//===--------------------------------------------------------------------===//
/// FoldingSetNodeIDRef - This class describes a reference to an interned
/// FoldingSetNodeID, which can be a useful to store node id data rather
@@ -217,13 +267,19 @@ template<typename T> struct FoldingSetTrait {
/// is often much larger than necessary, and the possibility of heap
/// allocation means it requires a non-trivial destructor call.
class FoldingSetNodeIDRef {
- unsigned* Data;
+ const unsigned* Data;
size_t Size;
public:
FoldingSetNodeIDRef() : Data(0), Size(0) {}
- FoldingSetNodeIDRef(unsigned *D, size_t S) : Data(D), Size(S) {}
+ FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
+
+ /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+ /// used to lookup the node in the FoldingSetImpl.
+ unsigned ComputeHash() const;
- unsigned *getData() const { return Data; }
+ bool operator==(FoldingSetNodeIDRef) const;
+
+ const unsigned *getData() const { return Data; }
size_t getSize() const { return Size; }
};
@@ -259,16 +315,17 @@ public:
inline void Add(const T& x) { FoldingSetTrait<T>::Profile(x, *this); }
/// clear - Clear the accumulated profile, allowing this FoldingSetNodeID
- /// object to be used to compute a new profile.
+ /// object to be used to compute a new profile.
inline void clear() { Bits.clear(); }
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
- /// to lookup the node in the FoldingSetImpl.
+ /// to lookup the node in the FoldingSetImpl.
unsigned ComputeHash() const;
/// operator== - Used to compare two nodes to each other.
///
bool operator==(const FoldingSetNodeID &RHS) const;
+ bool operator==(const FoldingSetNodeIDRef RHS) const;
/// Intern - Copy this node's data to a memory region allocated from the
/// given allocator and return a FoldingSetNodeIDRef describing the
@@ -281,6 +338,39 @@ typedef FoldingSetImpl::Node FoldingSetNode;
template<class T> class FoldingSetIterator;
template<class T> class FoldingSetBucketIterator;
+// Definitions of FoldingSetTrait and ContextualFoldingSetTrait functions, which
+// require the definition of FoldingSetNodeID.
+template<typename T>
+inline bool
+DefaultFoldingSetTrait<T>::Equals(T &X, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID) {
+ FoldingSetTrait<T>::Profile(X, TempID);
+ return TempID == ID;
+}
+template<typename T>
+inline unsigned
+DefaultFoldingSetTrait<T>::ComputeHash(T &X, FoldingSetNodeID &TempID) {
+ FoldingSetTrait<T>::Profile(X, TempID);
+ return TempID.ComputeHash();
+}
+template<typename T, typename Ctx>
+inline bool
+DefaultContextualFoldingSetTrait<T, Ctx>::Equals(T &X,
+ const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID,
+ Ctx Context) {
+ ContextualFoldingSetTrait<T, Ctx>::Profile(X, TempID, Context);
+ return TempID == ID;
+}
+template<typename T, typename Ctx>
+inline unsigned
+DefaultContextualFoldingSetTrait<T, Ctx>::ComputeHash(T &X,
+ FoldingSetNodeID &TempID,
+ Ctx Context) {
+ ContextualFoldingSetTrait<T, Ctx>::Profile(X, TempID, Context);
+ return TempID.ComputeHash();
+}
+
//===----------------------------------------------------------------------===//
/// FoldingSet - This template class is used to instantiate a specialized
/// implementation of the folding set to the node class T. T must be a
@@ -290,9 +380,23 @@ template<class T> class FoldingSet : public FoldingSetImpl {
private:
/// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
/// way to convert nodes into a unique specifier.
- virtual void GetNodeProfile(FoldingSetNodeID &ID, Node *N) const {
+ virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const {
+ T *TN = static_cast<T *>(N);
+ FoldingSetTrait<T>::Profile(*TN, ID);
+ }
+ /// NodeEquals - Instantiations may optionally provide a way to compare a
+ /// node with a specified ID.
+ virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID) const {
T *TN = static_cast<T *>(N);
- FoldingSetTrait<T>::Profile(*TN,ID);
+ return FoldingSetTrait<T>::Equals(*TN, ID, TempID);
+ }
+ /// NodeEquals - Instantiations may optionally provide a way to compute a
+ /// hash value directly from a node.
+ virtual unsigned ComputeNodeHash(Node *N,
+ FoldingSetNodeID &TempID) const {
+ T *TN = static_cast<T *>(N);
+ return FoldingSetTrait<T>::ComputeHash(*TN, TempID);
}
public:
@@ -354,13 +458,21 @@ private:
/// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
/// way to convert nodes into a unique specifier.
- virtual void GetNodeProfile(FoldingSetNodeID &ID,
- FoldingSetImpl::Node *N) const {
+ virtual void GetNodeProfile(FoldingSetImpl::Node *N,
+ FoldingSetNodeID &ID) const {
T *TN = static_cast<T *>(N);
-
- // We must use explicit template arguments in case Ctx is a
- // reference type.
- FoldingSetTrait<T>::template Profile<Ctx>(*TN, ID, Context);
+ ContextualFoldingSetTrait<T, Ctx>::Profile(*TN, ID, Context);
+ }
+ virtual bool NodeEquals(FoldingSetImpl::Node *N,
+ const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID) const {
+ T *TN = static_cast<T *>(N);
+ return ContextualFoldingSetTrait<T, Ctx>::Equals(*TN, ID, TempID, Context);
+ }
+ virtual unsigned ComputeNodeHash(FoldingSetImpl::Node *N,
+ FoldingSetNodeID &TempID) const {
+ T *TN = static_cast<T *>(N);
+ return ContextualFoldingSetTrait<T, Ctx>::ComputeHash(*TN, TempID, Context);
}
public:
@@ -447,8 +559,8 @@ public:
//===----------------------------------------------------------------------===//
/// FoldingSetBucketIteratorImpl - This is the common bucket iterator support
-/// shared by all folding sets, which knows how to walk a particular bucket
-/// of a folding set hash table.
+/// shared by all folding sets, which knows how to walk a particular bucket
+/// of a folding set hash table.
class FoldingSetBucketIteratorImpl {
protected:
@@ -549,7 +661,7 @@ class FastFoldingSetNode : public FoldingSetNode {
protected:
explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
public:
- void Profile(FoldingSetNodeID& ID) { ID = FastID; }
+ void Profile(FoldingSetNodeID& ID) const { ID = FastID; }
};
//===----------------------------------------------------------------------===//
@@ -559,9 +671,6 @@ template<typename T> struct FoldingSetTrait<T*> {
static inline void Profile(const T* X, FoldingSetNodeID& ID) {
ID.AddPointer(X);
}
- static inline void Profile(T* X, FoldingSetNodeID& ID) {
- ID.AddPointer(X);
- }
};
template<typename T> struct FoldingSetTrait<const T*> {
diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h
index 7aa315570f7c..968ce152779f 100644
--- a/include/llvm/ADT/ImmutableIntervalMap.h
+++ b/include/llvm/ADT/ImmutableIntervalMap.h
@@ -16,14 +16,14 @@ namespace llvm {
class Interval {
private:
- uint64_t Start;
- uint64_t End;
+ int64_t Start;
+ int64_t End;
public:
- Interval(uint64_t S, uint64_t E) : Start(S), End(E) {}
+ Interval(int64_t S, int64_t E) : Start(S), End(E) {}
- uint64_t getStart() const { return Start; }
- uint64_t getEnd() const { return End; }
+ int64_t getStart() const { return Start; }
+ int64_t getEnd() const { return End; }
};
template <typename T>
diff --git a/include/llvm/ADT/NullablePtr.h b/include/llvm/ADT/NullablePtr.h
new file mode 100644
index 000000000000..a9c47a138eca
--- /dev/null
+++ b/include/llvm/ADT/NullablePtr.h
@@ -0,0 +1,52 @@
+//===- llvm/ADT/NullablePtr.h - A pointer that allows null ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines and implements the NullablePtr class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_NULLABLE_PTR_H
+#define LLVM_ADT_NULLABLE_PTR_H
+
+#include <cassert>
+#include <cstddef>
+
+namespace llvm {
+/// NullablePtr pointer wrapper - NullablePtr is used for APIs where a
+/// potentially-null pointer gets passed around that must be explicitly handled
+/// in lots of places. By putting a wrapper around the null pointer, it makes
+/// it more likely that the null pointer case will be handled correctly.
+template<class T>
+class NullablePtr {
+ T *Ptr;
+public:
+ NullablePtr(T *P = 0) : Ptr(P) {}
+
+ bool isNull() const { return Ptr == 0; }
+ bool isNonNull() const { return Ptr != 0; }
+
+ /// get - Return the pointer if it is non-null.
+ const T *get() const {
+ assert(Ptr && "Pointer wasn't checked for null!");
+ return Ptr;
+ }
+
+ /// get - Return the pointer if it is non-null.
+ T *get() {
+ assert(Ptr && "Pointer wasn't checked for null!");
+ return Ptr;
+ }
+
+ T *getPtrOrNull() { return Ptr; }
+ const T *getPtrOrNull() const { return Ptr; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index 8dbf79031c35..0b0346be2cc5 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -225,7 +225,7 @@ inline T *array_endof(T (&x)[N]) {
/// Find the length of an array.
template<class T, std::size_t N>
-inline size_t array_lengthof(T (&x)[N]) {
+inline size_t array_lengthof(T (&)[N]) {
return N;
}
@@ -243,7 +243,7 @@ static inline int array_pod_sort_comparator(const void *P1, const void *P2) {
/// get_array_pad_sort_comparator - This is an internal helper function used to
/// get type deduction of T right.
template<typename T>
-static int (*get_array_pad_sort_comparator(const T &X))
+static int (*get_array_pad_sort_comparator(const T &))
(const void*, const void*) {
return array_pod_sort_comparator<T>;
}
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index b5ca374a7ddd..c96ad19707f3 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -139,7 +139,12 @@ public:
}
V lookup(const K &Key) {
- return TopLevelMap[Key]->getValue();
+ typename DenseMap<K, ScopedHashTableVal<K, V, KInfo>*, KInfo>::iterator
+ I = TopLevelMap.find(Key);
+ if (I != TopLevelMap.end())
+ return I->second->getValue();
+
+ return V();
}
void insert(const K &Key, const V &Val) {
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index fa61d207bd30..1d6181a95da3 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -206,7 +206,7 @@ template <typename T, bool isPodLike>
void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
size_t CurCapacity = this->capacity();
size_t CurSize = this->size();
- size_t NewCapacity = 2*CurCapacity;
+ size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero.
if (NewCapacity < MinSize)
NewCapacity = MinSize;
T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T)));
@@ -707,6 +707,36 @@ public:
};
+/// Specialize SmallVector at N=0. This specialization guarantees
+/// that it can be instantiated at an incomplete T if none of its
+/// members are required.
+template <typename T>
+class SmallVector<T,0> : public SmallVectorImpl<T> {
+public:
+ SmallVector() : SmallVectorImpl<T>(0) {}
+
+ explicit SmallVector(unsigned Size, const T &Value = T())
+ : SmallVectorImpl<T>(0) {
+ this->reserve(Size);
+ while (Size--)
+ this->push_back(Value);
+ }
+
+ template<typename ItTy>
+ SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(0) {
+ this->append(S, E);
+ }
+
+ SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(0) {
+ SmallVectorImpl<T>::operator=(RHS);
+ }
+
+ SmallVector &operator=(const SmallVectorImpl<T> &RHS) {
+ return SmallVectorImpl<T>::operator=(RHS);
+ }
+
+};
+
} // End llvm namespace
namespace std {
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 482193859b64..59ff6aa4f6aa 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -254,6 +254,10 @@ public:
StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
explicit StringMap(unsigned InitialSize)
: StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
+
+ explicit StringMap(AllocatorTy A)
+ : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), Allocator(A) {}
+
explicit StringMap(const StringMap &RHS)
: StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
assert(RHS.empty() &&
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 33756f605f01..8386d3ee428b 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -149,7 +149,10 @@ namespace llvm {
unsigned edit_distance(StringRef Other, bool AllowReplacements = true);
/// str - Get the contents as an std::string.
- std::string str() const { return std::string(Data, Length); }
+ std::string str() const {
+ if (Data == 0) return std::string();
+ return std::string(Data, Length);
+ }
/// @}
/// @name Operator Overloads
@@ -228,12 +231,14 @@ namespace llvm {
/// find_first_of - Find the first character in the string that is \arg C,
/// or npos if not found. Same as find.
- size_type find_first_of(char C, size_t = 0) const { return find(C); }
+ size_type find_first_of(char C, size_t From = 0) const {
+ return find(C, From);
+ }
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
- /// Note: O(size() * Chars.size())
+ /// Note: O(size() + Chars.size())
size_type find_first_of(StringRef Chars, size_t From = 0) const;
/// find_first_not_of - Find the first character in the string that is not
@@ -243,7 +248,7 @@ namespace llvm {
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
- /// Note: O(size() * Chars.size())
+ /// Note: O(size() + Chars.size())
size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
/// @}
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 00048361e86f..9c55f6b70e36 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -15,7 +15,6 @@
#define LLVM_ADT_STRINGSET_H
#include "llvm/ADT/StringMap.h"
-#include <cassert>
namespace llvm {
@@ -26,10 +25,10 @@ namespace llvm {
class StringSet : public llvm::StringMap<char, AllocatorTy> {
typedef llvm::StringMap<char, AllocatorTy> base;
public:
- bool insert(const std::string& InLang) {
+ bool insert(StringRef InLang) {
assert(!InLang.empty());
- const char* KeyStart = &InLang[0];
- const char* KeyEnd = KeyStart + InLang.size();
+ const char *KeyStart = InLang.data();
+ const char *KeyEnd = KeyStart + InLang.size();
return base::insert(llvm::StringMapEntry<char>::
Create(KeyStart, KeyEnd, base::getAllocator(), '+'));
}
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index 7dd5647df605..74805830d854 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -61,6 +61,26 @@ public:
return *this;
}
+ template<unsigned N>
+ StringSwitch& EndsWith(const char (&S)[N], const T &Value) {
+ if (!Result && Str.size() >= N-1 &&
+ std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) {
+ Result = &Value;
+ }
+
+ return *this;
+ }
+
+ template<unsigned N>
+ StringSwitch& StartsWith(const char (&S)[N], const T &Value) {
+ if (!Result && Str.size() >= N-1 &&
+ std::memcmp(S, Str.data(), N-1) == 0) {
+ Result = &Value;
+ }
+
+ return *this;
+ }
+
template<unsigned N0, unsigned N1>
StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
const T& Value) {
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index feade6a56fbd..8dca3c1cfb1b 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -24,7 +24,7 @@ class Twine;
/// Triple - Helper class for working with target triples.
///
-/// Target triples are strings in the format of:
+/// Target triples are strings in the canonical form:
/// ARCHITECTURE-VENDOR-OPERATING_SYSTEM
/// or
/// ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT
@@ -35,20 +35,11 @@ class Twine;
/// from the components of the target triple to well known IDs.
///
/// At its core the Triple class is designed to be a wrapper for a triple
-/// string; it does not normally change or normalize the triple string, instead
-/// it provides additional APIs to parse normalized parts out of the triple.
+/// string; the constructor does not change or normalize the triple string.
+/// Clients that need to handle the non-canonical triples that users often
+/// specify should use the normalize method.
///
-/// One curiosity this implies is that for some odd triples the results of,
-/// e.g., getOSName() can be very different from the result of getOS(). For
-/// example, for 'i386-mingw32', getOS() will return MinGW32, but since
-/// getOSName() is purely based on the string structure that will return the
-/// empty string.
-///
-/// Clients should generally avoid using getOSName() and related APIs unless
-/// they are familiar with the triple format (this is particularly true when
-/// rewriting a triple).
-///
-/// See autoconf/config.guess for a glimpse into what they look like in
+/// See autoconf/config.guess for a glimpse into what triples look like in
/// practice.
class Triple {
public:
@@ -117,6 +108,9 @@ private:
mutable OSType OS;
bool isInitialized() const { return Arch != InvalidArch; }
+ static ArchType ParseArch(StringRef ArchName);
+ static VendorType ParseVendor(StringRef VendorName);
+ static OSType ParseOS(StringRef OSName);
void Parse() const;
public:
@@ -134,6 +128,16 @@ public:
}
/// @}
+ /// @name Normalization
+ /// @{
+
+ /// normalize - Turn an arbitrary machine specification into the canonical
+ /// triple form (or something sensible that the Triple class understands if
+ /// nothing better can reasonably be done). In particular, it handles the
+ /// common case in which otherwise valid components are in the wrong order.
+ static std::string normalize(StringRef Str);
+
+ /// @}
/// @name Typed Component Access
/// @{
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index 9e30bd4e6f73..ded17fc32223 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -82,13 +82,13 @@ class ValueMap {
typedef typename Config::ExtraData ExtraData;
MapT Map;
ExtraData Data;
+ ValueMap(const ValueMap&); // DO NOT IMPLEMENT
+ ValueMap& operator=(const ValueMap&); // DO NOT IMPLEMENT
public:
typedef KeyT key_type;
typedef ValueT mapped_type;
typedef std::pair<KeyT, ValueT> value_type;
- ValueMap(const ValueMap& Other) : Map(Other.Map), Data(Other.Data) {}
-
explicit ValueMap(unsigned NumInitBuckets = 64)
: Map(NumInitBuckets), Data() {}
explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
@@ -149,7 +149,7 @@ public:
bool erase(const KeyT &Val) {
return Map.erase(Wrap(Val));
}
- bool erase(iterator I) {
+ void erase(iterator I) {
return Map.erase(I.base());
}
@@ -161,12 +161,6 @@ public:
return Map[Wrap(Key)];
}
- ValueMap& operator=(const ValueMap& Other) {
- Map = Other.Map;
- Data = Other.Data;
- return *this;
- }
-
/// isPointerIntoBucketsArray - Return true if the specified pointer points
/// somewhere into the ValueMap's array of buckets (i.e. either to a key or
/// value in the ValueMap).
@@ -250,12 +244,6 @@ public:
}
};
-
-template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
-struct isPodLike<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
- static const bool value = true;
-};
-
template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> VH;
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index 9479d00f31be..4e3afe171199 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -614,7 +614,6 @@ public:
template<class Pr3> void sort(Pr3 pred);
void sort() { sort(op_less); }
- void reverse();
};
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index e611a35fc983..ad68d48e531b 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -18,12 +18,9 @@
//
// This API represents memory as a (Pointer, Size) pair. The Pointer component
// specifies the base memory address of the region, the Size specifies how large
-// of an area is being queried. If Size is 0, two pointers only alias if they
-// are exactly equal. If size is greater than zero, but small, the two pointers
-// alias if the areas pointed to overlap. If the size is very large (ie, ~0U),
-// then the two pointers alias if they may be pointing to components of the same
-// memory object. Pointers that point to two completely different objects in
-// memory never alias, regardless of the value of the Size component.
+// of an area is being queried, or UnknownSize if the size is not known.
+// Pointers that point to two completely different objects in memory never
+// alias, regardless of the value of the Size component.
//
//===----------------------------------------------------------------------===//
@@ -46,8 +43,11 @@ class AnalysisUsage;
class AliasAnalysis {
protected:
const TargetData *TD;
+
+private:
AliasAnalysis *AA; // Previous Alias Analysis to chain to.
+protected:
/// InitializeAliasAnalysis - Subclasses must call this method to initialize
/// the AliasAnalysis interface before any other methods are called. This is
/// typically called by the run* methods of these subclasses. This may be
@@ -64,6 +64,11 @@ public:
AliasAnalysis() : TD(0), AA(0) {}
virtual ~AliasAnalysis(); // We want to be subclassed
+ /// UnknownSize - This is a special value which can be used with the
+ /// size arguments in alias queries to indicate that the caller does not
+ /// know the sizes of the potential memory references.
+ static unsigned const UnknownSize = ~0u;
+
/// getTargetData - Return a pointer to the current TargetData object, or
/// null if no TargetData object is available.
///
@@ -84,6 +89,9 @@ public:
/// if (AA.alias(P1, P2)) { ... }
/// to check to see if two pointers might alias.
///
+ /// See docs/AliasAnalysis.html for more information on the specific meanings
+ /// of these values.
+ ///
enum AliasResult { NoAlias = 0, MayAlias = 1, MustAlias = 2 };
/// alias - The main low level interface to the alias analysis implementation.
@@ -94,6 +102,11 @@ public:
virtual AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size);
+ /// alias - A convenience wrapper for the case where the sizes are unknown.
+ AliasResult alias(const Value *V1, const Value *V2) {
+ return alias(V1, UnknownSize, V2, UnknownSize);
+ }
+
/// isNoAlias - A trivial helper function to check to see if the specified
/// pointers are no-alias.
bool isNoAlias(const Value *V1, unsigned V1Size,
@@ -130,17 +143,11 @@ public:
// AccessesArguments - This function accesses function arguments in well
// known (possibly volatile) ways, but does not access any other memory.
- //
- // Clients may use the Info parameter of getModRefBehavior to get specific
- // information about how pointer arguments are used.
AccessesArguments,
// AccessesArgumentsAndGlobals - This function has accesses function
// arguments and global variables well known (possibly volatile) ways, but
// does not access any other memory.
- //
- // Clients may use the Info parameter of getModRefBehavior to get specific
- // information about how pointer arguments are used.
AccessesArgumentsAndGlobals,
// OnlyReadsMemory - This function does not perform any non-local stores or
@@ -154,31 +161,17 @@ public:
UnknownModRefBehavior
};
- /// PointerAccessInfo - This struct is used to return results for pointers,
- /// globals, and the return value of a function.
- struct PointerAccessInfo {
- /// V - The value this record corresponds to. This may be an Argument for
- /// the function, a GlobalVariable, or null, corresponding to the return
- /// value for the function.
- Value *V;
-
- /// ModRefInfo - Whether the pointer is loaded or stored to/from.
- ///
- ModRefResult ModRefInfo;
- };
-
/// getModRefBehavior - Return the behavior when calling the given call site.
- virtual ModRefBehavior getModRefBehavior(CallSite CS,
- std::vector<PointerAccessInfo> *Info = 0);
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
/// getModRefBehavior - Return the behavior when calling the given function.
/// For use when the call site is not known.
- virtual ModRefBehavior getModRefBehavior(Function *F,
- std::vector<PointerAccessInfo> *Info = 0);
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
- /// getModRefBehavior - Return the modref behavior of the intrinsic with the
- /// given id.
- static ModRefBehavior getModRefBehavior(unsigned iid);
+ /// getIntrinsicModRefBehavior - Return the modref behavior of the intrinsic
+ /// with the given id. Most clients won't need this, because the regular
+ /// getModRefBehavior incorporates this information.
+ static ModRefBehavior getIntrinsicModRefBehavior(unsigned iid);
/// doesNotAccessMemory - If the specified call is known to never read or
/// write memory, return true. If the call only reads from known-constant
@@ -191,14 +184,14 @@ public:
///
/// This property corresponds to the GCC 'const' attribute.
///
- bool doesNotAccessMemory(CallSite CS) {
+ bool doesNotAccessMemory(ImmutableCallSite CS) {
return getModRefBehavior(CS) == DoesNotAccessMemory;
}
/// doesNotAccessMemory - If the specified function is known to never read or
/// write memory, return true. For use when the call site is not known.
///
- bool doesNotAccessMemory(Function *F) {
+ bool doesNotAccessMemory(const Function *F) {
return getModRefBehavior(F) == DoesNotAccessMemory;
}
@@ -211,7 +204,7 @@ public:
///
/// This property corresponds to the GCC 'pure' attribute.
///
- bool onlyReadsMemory(CallSite CS) {
+ bool onlyReadsMemory(ImmutableCallSite CS) {
ModRefBehavior MRB = getModRefBehavior(CS);
return MRB == DoesNotAccessMemory || MRB == OnlyReadsMemory;
}
@@ -220,7 +213,7 @@ public:
/// non-volatile memory (or not access memory at all), return true. For use
/// when the call site is not known.
///
- bool onlyReadsMemory(Function *F) {
+ bool onlyReadsMemory(const Function *F) {
ModRefBehavior MRB = getModRefBehavior(F);
return MRB == DoesNotAccessMemory || MRB == OnlyReadsMemory;
}
@@ -234,36 +227,36 @@ public:
/// a particular call site modifies or reads the memory specified by the
/// pointer.
///
- virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
/// getModRefInfo - Return information about whether two call sites may refer
- /// to the same set of memory locations. This function returns NoModRef if
- /// the two calls refer to disjoint memory locations, Ref if CS1 reads memory
- /// written by CS2, Mod if CS1 writes to memory read or written by CS2, or
- /// ModRef if CS1 might read or write memory accessed by CS2.
- ///
- virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
+ /// to the same set of memory locations. See
+ /// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
+ /// for details.
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2);
public:
/// Convenience functions...
- ModRefResult getModRefInfo(LoadInst *L, Value *P, unsigned Size);
- ModRefResult getModRefInfo(StoreInst *S, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallInst *C, Value *P, unsigned Size) {
- return getModRefInfo(CallSite(C), P, Size);
- }
- ModRefResult getModRefInfo(InvokeInst *I, Value *P, unsigned Size) {
- return getModRefInfo(CallSite(I), P, Size);
+ ModRefResult getModRefInfo(const LoadInst *L, const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(const StoreInst *S, const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, unsigned Size);
+ ModRefResult getModRefInfo(const CallInst *C, const Value *P, unsigned Size) {
+ return getModRefInfo(ImmutableCallSite(C), P, Size);
}
- ModRefResult getModRefInfo(VAArgInst* I, Value* P, unsigned Size) {
- return AliasAnalysis::ModRef;
+ ModRefResult getModRefInfo(const InvokeInst *I,
+ const Value *P, unsigned Size) {
+ return getModRefInfo(ImmutableCallSite(I), P, Size);
}
- ModRefResult getModRefInfo(Instruction *I, Value *P, unsigned Size) {
+ ModRefResult getModRefInfo(const Instruction *I,
+ const Value *P, unsigned Size) {
switch (I->getOpcode()) {
- case Instruction::VAArg: return getModRefInfo((VAArgInst*)I, P, Size);
- case Instruction::Load: return getModRefInfo((LoadInst*)I, P, Size);
- case Instruction::Store: return getModRefInfo((StoreInst*)I, P, Size);
- case Instruction::Call: return getModRefInfo((CallInst*)I, P, Size);
- case Instruction::Invoke: return getModRefInfo((InvokeInst*)I, P, Size);
+ case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, P,Size);
+ case Instruction::Load: return getModRefInfo((const LoadInst*)I, P, Size);
+ case Instruction::Store: return getModRefInfo((const StoreInst*)I, P,Size);
+ case Instruction::Call: return getModRefInfo((const CallInst*)I, P, Size);
+ case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,P,Size);
default: return NoModRef;
}
}
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 09f12ad281ac..8e2f7fd29a31 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -92,7 +92,8 @@ class AliasSet : public ilist_node<AliasSet> {
AliasSet *Forward; // Forwarding pointer.
AliasSet *Next, *Prev; // Doubly linked list of AliasSets.
- std::vector<CallSite> CallSites; // All calls & invokes in this alias set.
+ // All calls & invokes in this alias set.
+ std::vector<AssertingVH<Instruction> > CallSites;
// RefCount - Number of nodes pointing to this AliasSet plus the number of
// AliasSets forwarding to it.
@@ -127,6 +128,11 @@ class AliasSet : public ilist_node<AliasSet> {
removeFromTracker(AST);
}
+ CallSite getCallSite(unsigned i) const {
+ assert(i < CallSites.size());
+ return CallSite(CallSites[i]);
+ }
+
public:
/// Accessors...
bool isRef() const { return AccessTy & Refs; }
@@ -229,7 +235,7 @@ private:
void addCallSite(CallSite CS, AliasAnalysis &AA);
void removeCallSite(CallSite CS) {
for (size_t i = 0, e = CallSites.size(); i != e; ++i)
- if (CallSites[i].getInstruction() == CS.getInstruction()) {
+ if (CallSites[i] == CS.getInstruction()) {
CallSites[i] = CallSites.back();
CallSites.pop_back();
}
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index 4828eba5b528..d8daf5196fca 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -22,7 +22,7 @@ template <class Analysis, bool Simple>
struct DOTGraphTraitsViewer : public FunctionPass {
std::string Name;
- DOTGraphTraitsViewer(std::string GraphName, const void *ID) : FunctionPass(ID) {
+ DOTGraphTraitsViewer(std::string GraphName, char &ID) : FunctionPass(ID) {
Name = GraphName;
}
@@ -48,7 +48,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass {
std::string Name;
- DOTGraphTraitsPrinter(std::string GraphName, const void *ID)
+ DOTGraphTraitsPrinter(std::string GraphName, char &ID)
: FunctionPass(ID) {
Name = GraphName;
}
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index a85b6bc76967..2d1418da64d8 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -36,6 +36,12 @@ namespace llvm {
class LLVMContext;
class raw_ostream;
+ class DIFile;
+ class DISubprogram;
+ class DILexicalBlock;
+ class DIVariable;
+ class DIType;
+
/// DIDescriptor - A thin wraper around MDNode to access encoded debug info.
/// This should not be stored in a container, because underly MDNode may
/// change in certain situations.
@@ -56,11 +62,17 @@ namespace llvm {
}
GlobalVariable *getGlobalVariableField(unsigned Elt) const;
+ Constant *getConstantField(unsigned Elt) const;
Function *getFunctionField(unsigned Elt) const;
public:
explicit DIDescriptor() : DbgNode(0) {}
explicit DIDescriptor(const MDNode *N) : DbgNode(N) {}
+ explicit DIDescriptor(const DIFile F);
+ explicit DIDescriptor(const DISubprogram F);
+ explicit DIDescriptor(const DILexicalBlock F);
+ explicit DIDescriptor(const DIVariable F);
+ explicit DIDescriptor(const DIType F);
bool Verify() const { return DbgNode != 0; }
@@ -134,7 +146,7 @@ namespace llvm {
public:
explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
- unsigned getLanguage() const { return getUnsignedField(2); }
+ unsigned getLanguage() const { return getUnsignedField(2); }
StringRef getFilename() const { return getStringField(3); }
StringRef getDirectory() const { return getStringField(4); }
StringRef getProducer() const { return getStringField(5); }
@@ -260,6 +272,10 @@ namespace llvm {
StringRef getFilename() const { return getCompileUnit().getFilename();}
StringRef getDirectory() const { return getCompileUnit().getDirectory();}
+ /// replaceAllUsesWith - Replace all uses of debug info referenced by
+ /// this descriptor.
+ void replaceAllUsesWith(DIDescriptor &D);
+
/// print - print type.
void print(raw_ostream &OS) const;
@@ -274,6 +290,9 @@ namespace llvm {
unsigned getEncoding() const { return getUnsignedField(9); }
+ /// Verify - Verify that a basic type descriptor is well formed.
+ bool Verify() const;
+
/// print - print basic type.
void print(raw_ostream &OS) const;
@@ -297,16 +316,14 @@ namespace llvm {
/// return base type size.
uint64_t getOriginalTypeSize() const;
+ /// Verify - Verify that a derived type descriptor is well formed.
+ bool Verify() const;
+
/// print - print derived type.
void print(raw_ostream &OS) const;
/// dump - print derived type to dbgs() with a newline.
void dump() const;
-
- /// replaceAllUsesWith - Replace all uses of debug info referenced by
- /// this descriptor. After this completes, the current debug info value
- /// is erased.
- void replaceAllUsesWith(DIDescriptor &D);
};
/// DICompositeType - This descriptor holds a type that can refer to multiple
@@ -437,6 +454,7 @@ namespace llvm {
unsigned isDefinition() const { return getUnsignedField(10); }
GlobalVariable *getGlobal() const { return getGlobalVariableField(11); }
+ Constant *getConstant() const { return getConstantField(11); }
/// Verify - Verify that a global variable descriptor is well formed.
bool Verify() const;
@@ -504,10 +522,18 @@ namespace llvm {
public:
explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
DIScope getContext() const { return getFieldAs<DIScope>(1); }
- StringRef getDirectory() const { return getContext().getDirectory(); }
- StringRef getFilename() const { return getContext().getFilename(); }
unsigned getLineNumber() const { return getUnsignedField(2); }
unsigned getColumnNumber() const { return getUnsignedField(3); }
+ StringRef getDirectory() const {
+ DIFile F = getFieldAs<DIFile>(4);
+ StringRef dir = F.getDirectory();
+ return !dir.empty() ? dir : getContext().getDirectory();
+ }
+ StringRef getFilename() const {
+ DIFile F = getFieldAs<DIFile>(4);
+ StringRef filename = F.getFilename();
+ return !filename.empty() ? filename : getContext().getFilename();
+ }
};
/// DINameSpace - A wrapper for a C++ style name space.
@@ -634,6 +660,9 @@ namespace llvm {
unsigned RunTimeLang = 0,
MDNode *ContainingType = 0);
+ /// CreateTemporaryType - Create a temporary forward-declared type.
+ DIType CreateTemporaryType();
+
/// CreateArtificialType - Create a new DIType with "artificial" flag set.
DIType CreateArtificialType(DIType Ty);
@@ -648,7 +677,8 @@ namespace llvm {
unsigned Flags,
DIType DerivedFrom,
DIArray Elements,
- unsigned RunTimeLang = 0);
+ unsigned RunTimeLang = 0,
+ MDNode *ContainingType = 0);
/// CreateSubprogram - Create a new descriptor for the specified subprogram.
/// See comments in DISubprogram for descriptions of these fields.
@@ -678,6 +708,15 @@ namespace llvm {
unsigned LineNo, DIType Ty, bool isLocalToUnit,
bool isDefinition, llvm::GlobalVariable *GV);
+ /// CreateGlobalVariable - Create a new descriptor for the specified constant.
+ DIGlobalVariable
+ CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+ StringRef DisplayName,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty, bool isLocalToUnit,
+ bool isDefinition, llvm::Constant *C);
+
/// CreateVariable - Create a new descriptor for the specified variable.
DIVariable CreateVariable(unsigned Tag, DIDescriptor Context,
StringRef Name,
@@ -694,8 +733,8 @@ namespace llvm {
/// CreateLexicalBlock - This creates a descriptor for a lexical block
/// with the specified parent context.
- DILexicalBlock CreateLexicalBlock(DIDescriptor Context, unsigned Line = 0,
- unsigned Col = 0);
+ DILexicalBlock CreateLexicalBlock(DIDescriptor Context, DIFile F,
+ unsigned Line = 0, unsigned Col = 0);
/// CreateNameSpace - This creates new descriptor for a namespace
/// with the specified parent context.
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 1979d3f6820b..73c6e6286b5b 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -702,7 +702,7 @@ public:
static char ID; // Pass ID, replacement for typeid
DominatorTreeBase<BasicBlock>* DT;
- DominatorTree() : FunctionPass(&ID) {
+ DominatorTree() : FunctionPass(ID) {
DT = new DominatorTreeBase<BasicBlock>(false);
}
@@ -890,7 +890,7 @@ protected:
const bool IsPostDominators;
public:
- DominanceFrontierBase(void *ID, bool isPostDom)
+ DominanceFrontierBase(char &ID, bool isPostDom)
: FunctionPass(ID), IsPostDominators(isPostDom) {}
/// getRoots - Return the root blocks of the current CFG. This may include
@@ -995,6 +995,9 @@ public:
/// print - Convert to human readable form
///
virtual void print(raw_ostream &OS, const Module* = 0) const;
+
+ /// dump - Dump the dominance frontier to dbgs().
+ void dump() const;
};
@@ -1006,7 +1009,7 @@ class DominanceFrontier : public DominanceFrontierBase {
public:
static char ID; // Pass ID, replacement for typeid
DominanceFrontier() :
- DominanceFrontierBase(&ID, false) {}
+ DominanceFrontierBase(ID, false) {}
BasicBlock *getRoot() const {
assert(Roots.size() == 1 && "Should always have entry node!");
diff --git a/include/llvm/Analysis/FindUsedTypes.h b/include/llvm/Analysis/FindUsedTypes.h
index 1337385848ed..8a78eb624973 100644
--- a/include/llvm/Analysis/FindUsedTypes.h
+++ b/include/llvm/Analysis/FindUsedTypes.h
@@ -26,7 +26,7 @@ class FindUsedTypes : public ModulePass {
std::set<const Type *> UsedTypes;
public:
static char ID; // Pass identification, replacement for typeid
- FindUsedTypes() : ModulePass(&ID) {}
+ FindUsedTypes() : ModulePass(ID) {}
/// getTypes - After the pass has been run, return the set containing all of
/// the types used in the module.
diff --git a/include/llvm/Analysis/IntervalPartition.h b/include/llvm/Analysis/IntervalPartition.h
index c1214e7427a4..75a5cdf1f99d 100644
--- a/include/llvm/Analysis/IntervalPartition.h
+++ b/include/llvm/Analysis/IntervalPartition.h
@@ -48,7 +48,7 @@ class IntervalPartition : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- IntervalPartition() : FunctionPass(&ID), RootInterval(0) {}
+ IntervalPartition() : FunctionPass(ID), RootInterval(0) {}
// run - Calculate the interval partition for this function
virtual bool runOnFunction(Function &F);
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 566788daa616..b2a3afbc986a 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LIVEVALUES_H
-#define LLVM_ANALYSIS_LIVEVALUES_H
+#ifndef LLVM_ANALYSIS_LAZYVALUEINFO_H
+#define LLVM_ANALYSIS_LAZYVALUEINFO_H
#include "llvm/Pass.h"
@@ -31,7 +31,7 @@ class LazyValueInfo : public FunctionPass {
void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
public:
static char ID;
- LazyValueInfo() : FunctionPass(&ID), PImpl(0) {}
+ LazyValueInfo() : FunctionPass(ID), PImpl(0) {}
~LazyValueInfo() { assert(PImpl == 0 && "releaseMemory not called"); }
/// Tristate - This is used to return true/false/dunno results.
@@ -57,6 +57,12 @@ public:
/// constant on the specified edge. Return null if not.
Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB);
+ /// threadEdge - Inform the analysis cache that we have threaded an edge from
+ /// PredBB to OldSucc to be from PredBB to NewSucc instead.
+ void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc);
+
+ /// eraseBlock - Inform the analysis cache that we have erased a block.
+ void eraseBlock(BasicBlock *BB);
// Implementation boilerplate.
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
index 01f108d29042..c9adf3f36ad7 100644
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ b/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -28,18 +28,20 @@ namespace llvm {
LibCallInfo *LCI;
explicit LibCallAliasAnalysis(LibCallInfo *LC = 0)
- : FunctionPass(&ID), LCI(LC) {
+ : FunctionPass(ID), LCI(LC) {
}
- explicit LibCallAliasAnalysis(const void *ID, LibCallInfo *LC)
+ explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC)
: FunctionPass(ID), LCI(LC) {
}
~LibCallAliasAnalysis();
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
// TODO: Could compare two direct calls against each other if we cared to.
- return AliasAnalysis::getModRefInfo(CS1,CS2);
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -49,9 +51,20 @@ namespace llvm {
return false;
}
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
private:
ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- CallSite CS, Value *P, unsigned Size);
+ ImmutableCallSite CS,
+ const Value *P, unsigned Size);
};
} // End of llvm namespace
diff --git a/include/llvm/Analysis/LibCallSemantics.h b/include/llvm/Analysis/LibCallSemantics.h
index 74e8401a1fe6..31d7cc56ce53 100644
--- a/include/llvm/Analysis/LibCallSemantics.h
+++ b/include/llvm/Analysis/LibCallSemantics.h
@@ -47,7 +47,8 @@ namespace llvm {
enum LocResult {
Yes, No, Unknown
};
- LocResult (*isLocation)(CallSite CS, const Value *Ptr, unsigned Size);
+ LocResult (*isLocation)(ImmutableCallSite CS,
+ const Value *Ptr, unsigned Size);
};
/// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
@@ -142,7 +143,7 @@ namespace llvm {
/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
/// the specified function if we have it. If not, return null.
- const LibCallFunctionInfo *getFunctionInfo(Function *F) const;
+ const LibCallFunctionInfo *getFunctionInfo(const Function *F) const;
//===------------------------------------------------------------------===//
diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h
index a1a563796f5a..94fd9907090d 100644
--- a/include/llvm/Analysis/LoopDependenceAnalysis.h
+++ b/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -91,7 +91,7 @@ class LoopDependenceAnalysis : public LoopPass {
public:
static char ID; // Class identification, replacement for typeinfo
- LoopDependenceAnalysis() : LoopPass(&ID) {}
+ LoopDependenceAnalysis() : LoopPass(ID) {}
/// isDependencePair - Check whether two values can possibly give rise to
/// a data dependence: that is the case if both are instructions accessing
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 9455fd840326..462620f7e3cb 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -35,6 +35,7 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/raw_ostream.h"
@@ -229,13 +230,16 @@ public:
return 0;
}
+ /// Edge type.
+ typedef std::pair<BlockT*, BlockT*> Edge;
+
/// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
- typedef std::pair<const BlockT*,const BlockT*> Edge;
- void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const {
+ template <typename EdgeT>
+ void getExitEdges(SmallVectorImpl<EdgeT> &ExitEdges) const {
// Sort the blocks vector so that we can use binary search to do quick
// lookups.
SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
- std::sort(LoopBBs.begin(), LoopBBs.end());
+ array_pod_sort(LoopBBs.begin(), LoopBBs.end());
typedef GraphTraits<BlockT*> BlockTraits;
for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
@@ -244,7 +248,7 @@ public:
I != E; ++I)
if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
// Not in current loop? It must be an exit block.
- ExitEdges.push_back(std::make_pair(*BI, *I));
+ ExitEdges.push_back(EdgeT(*BI, *I));
}
/// getLoopPreheader - If there is a preheader for this loop, return it. A
@@ -505,6 +509,12 @@ protected:
}
};
+template<class BlockT, class LoopT>
+raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
+ Loop.print(OS);
+ return OS;
+}
+
class Loop : public LoopBase<BasicBlock, Loop> {
public:
Loop() {}
@@ -552,12 +562,6 @@ public:
///
PHINode *getCanonicalInductionVariable() const;
- /// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
- /// the canonical induction variable value for the "next" iteration of the
- /// loop. This always succeeds if getCanonicalInductionVariable succeeds.
- ///
- Instruction *getCanonicalInductionVariableIncrement() const;
-
/// getTripCount - Return a loop-invariant LLVM value indicating the number of
/// times the loop will be executed. Note that this means that the backedge
/// of the loop executes N-1 times. If the trip-count cannot be determined,
@@ -936,7 +940,7 @@ class LoopInfo : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LoopInfo() : FunctionPass(&ID) {}
+ LoopInfo() : FunctionPass(ID) {}
LoopInfoBase<BasicBlock, Loop>& getBase() { return LI; }
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 6f77d019b691..1603d2ea7a4f 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -19,6 +19,7 @@
#include "llvm/Pass.h"
#include "llvm/PassManagers.h"
#include "llvm/Function.h"
+#include <deque>
namespace llvm {
@@ -28,8 +29,7 @@ class PMStack;
class LoopPass : public Pass {
public:
- explicit LoopPass(intptr_t pid) : Pass(PT_Loop, pid) {}
- explicit LoopPass(void *pid) : Pass(PT_Loop, pid) {}
+ explicit LoopPass(char &pid) : Pass(PT_Loop, pid) {}
/// getPrinterPass - Get a pass to print the function corresponding
/// to a Loop.
@@ -58,7 +58,7 @@ public:
/// Assign pass manager to manage this pass
virtual void assignPassManager(PMStack &PMS,
- PassManagerType PMT = PMT_LoopPassManager);
+ PassManagerType PMT);
/// Return what kind of Pass Manager can manage this pass.
virtual PassManagerType getPotentialPassManagerType() const {
@@ -104,10 +104,10 @@ public:
/// Print passes managed by this manager
void dumpPassStructure(unsigned Offset);
- Pass *getContainedPass(unsigned N) {
+ LoopPass *getContainedPass(unsigned N) {
assert(N < PassVector.size() && "Pass number out of range!");
- Pass *FP = static_cast<Pass *>(PassVector[N]);
- return FP;
+ LoopPass *LP = static_cast<LoopPass *>(PassVector[N]);
+ return LP;
}
virtual PassManagerType getPassManagerType() const {
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index ce3f7a6677a5..37425ebe8358 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -81,11 +81,18 @@ namespace llvm {
//===--------------------------------------------------------------------===//
//
+ // createTypeBasedAliasAnalysisPass - This pass implements metadata-based
+ // type-based alias analysis.
+ //
+ ImmutablePass *createTypeBasedAliasAnalysisPass();
+
+ //===--------------------------------------------------------------------===//
+ //
// createProfileLoaderPass - This pass loads information from a profile dump
// file.
//
ModulePass *createProfileLoaderPass();
- extern const PassInfo *ProfileLoaderPassID;
+ extern char &ProfileLoaderPassID;
//===--------------------------------------------------------------------===//
//
@@ -99,7 +106,7 @@ namespace llvm {
// instead of loading it from a previous run.
//
FunctionPass *createProfileEstimatorPass();
- extern const PassInfo *ProfileEstimatorPassID;
+ extern char &ProfileEstimatorPassID;
//===--------------------------------------------------------------------===//
//
@@ -154,6 +161,13 @@ namespace llvm {
// print debug info intrinsics in human readable form
FunctionPass *createDbgInfoPrinterPass();
+ //===--------------------------------------------------------------------===//
+ //
+ // createRegionInfoPass - This pass finds all single entry single exit regions
+ // in a function and builds the region hierarchy.
+ //
+ FunctionPass *createRegionInfoPass();
+
// Print module-level debug info metadata in human-readable form.
ModulePass *createModuleDebugInfoPrinterPass();
}
diff --git a/include/llvm/Analysis/PointerTracking.h b/include/llvm/Analysis/PointerTracking.h
index 6c4f838430b6..6b49e18c1b63 100644
--- a/include/llvm/Analysis/PointerTracking.h
+++ b/include/llvm/Analysis/PointerTracking.h
@@ -98,6 +98,7 @@ namespace llvm {
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
void print(raw_ostream &OS, const Module* = 0) const;
+ Value *computeAllocationCountValue(Value *P, const Type *&Ty) const;
private:
Function *FF;
TargetData *TD;
diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h
index 5552017d9180..46ce8200f966 100644
--- a/include/llvm/Analysis/PostDominators.h
+++ b/include/llvm/Analysis/PostDominators.h
@@ -25,7 +25,7 @@ struct PostDominatorTree : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
DominatorTreeBase<BasicBlock>* DT;
- PostDominatorTree() : FunctionPass(&ID) {
+ PostDominatorTree() : FunctionPass(ID) {
DT = new DominatorTreeBase<BasicBlock>(true);
}
@@ -106,7 +106,7 @@ template <> struct GraphTraits<PostDominatorTree*>
struct PostDominanceFrontier : public DominanceFrontierBase {
static char ID;
PostDominanceFrontier()
- : DominanceFrontierBase(&ID, true) {}
+ : DominanceFrontierBase(ID, true) {}
virtual bool runOnFunction(Function &) {
Frontiers.clear();
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
new file mode 100644
index 000000000000..7a2670f2c08c
--- /dev/null
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -0,0 +1,630 @@
+//===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Calculate a program structure tree built out of single entry single exit
+// regions.
+// The basic ideas are taken from "The Program Structure Tree - Richard Johnson,
+// David Pearson, Keshav Pingali - 1994", however enriched with ideas from "The
+// Refined Process Structure Tree - Jussi Vanhatalo, Hagen Voelyer, Jana
+// Koehler - 2009".
+// The algorithm to calculate these data structures however is completely
+// different, as it takes advantage of existing information already available
+// in (Post)dominace tree and dominance frontier passes. This leads to a simpler
+// and in practice hopefully better performing algorithm. The runtime of the
+// algorithms described in the papers above are both linear in graph size,
+// O(V+E), whereas this algorithm is not, as the dominance frontier information
+// itself is not, but in practice runtime seems to be in the order of magnitude
+// of dominance tree calculation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_REGION_INFO_H
+#define LLVM_ANALYSIS_REGION_INFO_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class Region;
+class RegionInfo;
+class raw_ostream;
+class Loop;
+class LoopInfo;
+
+/// @brief Marker class to iterate over the elements of a Region in flat mode.
+///
+/// The class is used to either iterate in Flat mode or by not using it to not
+/// iterate in Flat mode. During a Flat mode iteration all Regions are entered
+/// and the iteration returns every BasicBlock. If the Flat mode is not
+/// selected for SubRegions just one RegionNode containing the subregion is
+/// returned.
+template <class GraphType>
+class FlatIt {};
+
+/// @brief A RegionNode represents a subregion or a BasicBlock that is part of a
+/// Region.
+class RegionNode {
+ // DO NOT IMPLEMENT
+ RegionNode(const RegionNode &);
+ // DO NOT IMPLEMENT
+ const RegionNode &operator=(const RegionNode &);
+
+ /// This is the entry basic block that starts this region node. If this is a
+ /// BasicBlock RegionNode, then entry is just the basic block, that this
+ /// RegionNode represents. Otherwise it is the entry of this (Sub)RegionNode.
+ ///
+ /// In the BBtoRegionNode map of the parent of this node, BB will always map
+ /// to this node no matter which kind of node this one is.
+ ///
+ /// The node can hold either a Region or a BasicBlock.
+ /// Use one bit to save, if this RegionNode is a subregion or BasicBlock
+ /// RegionNode.
+ PointerIntPair<BasicBlock*, 1, bool> entry;
+
+protected:
+ /// @brief The parent Region of this RegionNode.
+ /// @see getParent()
+ Region* parent;
+
+public:
+ /// @brief Create a RegionNode.
+ ///
+ /// @param Parent The parent of this RegionNode.
+ /// @param Entry The entry BasicBlock of the RegionNode. If this
+ /// RegionNode represents a BasicBlock, this is the
+ /// BasicBlock itself. If it represents a subregion, this
+ /// is the entry BasicBlock of the subregion.
+ /// @param isSubRegion If this RegionNode represents a SubRegion.
+ inline RegionNode(Region* Parent, BasicBlock* Entry, bool isSubRegion = 0)
+ : entry(Entry, isSubRegion), parent(Parent) {}
+
+ /// @brief Get the parent Region of this RegionNode.
+ ///
+ /// The parent Region is the Region this RegionNode belongs to. If for
+ /// example a BasicBlock is element of two Regions, there exist two
+ /// RegionNodes for this BasicBlock. Each with the getParent() function
+ /// pointing to the Region this RegionNode belongs to.
+ ///
+ /// @return Get the parent Region of this RegionNode.
+ inline Region* getParent() const { return parent; }
+
+ /// @brief Get the entry BasicBlock of this RegionNode.
+ ///
+ /// If this RegionNode represents a BasicBlock this is just the BasicBlock
+ /// itself, otherwise we return the entry BasicBlock of the Subregion
+ ///
+ /// @return The entry BasicBlock of this RegionNode.
+ inline BasicBlock* getEntry() const { return entry.getPointer(); }
+
+ /// @brief Get the content of this RegionNode.
+ ///
+ /// This can be either a BasicBlock or a subregion. Before calling getNodeAs()
+ /// check the type of the content with the isSubRegion() function call.
+ ///
+ /// @return The content of this RegionNode.
+ template<class T>
+ inline T* getNodeAs() const;
+
+ /// @brief Is this RegionNode a subregion?
+ ///
+ /// @return True if it contains a subregion. False if it contains a
+ /// BasicBlock.
+ inline bool isSubRegion() const {
+ return entry.getInt();
+ }
+};
+
+/// Print a RegionNode.
+inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node);
+
+template<>
+inline BasicBlock* RegionNode::getNodeAs<BasicBlock>() const {
+ assert(!isSubRegion() && "This is not a BasicBlock RegionNode!");
+ return getEntry();
+}
+
+template<>
+inline Region* RegionNode::getNodeAs<Region>() const {
+ assert(isSubRegion() && "This is not a subregion RegionNode!");
+ return reinterpret_cast<Region*>(const_cast<RegionNode*>(this));
+}
+
+//===----------------------------------------------------------------------===//
+/// @brief A single entry single exit Region.
+///
+/// A Region is a connected subgraph of a control flow graph that has exactly
+/// two connections to the remaining graph. It can be used to analyze or
+/// optimize parts of the control flow graph.
+///
+/// A <em> simple Region </em> is connected to the remaing graph by just two
+/// edges. One edge entering the Region and another one leaving the Region.
+///
+/// An <em> extended Region </em> (or just Region) is a subgraph that can be
+/// transform into a simple Region. The transformation is done by adding
+/// BasicBlocks that merge several entry or exit edges so that after the merge
+/// just one entry and one exit edge exists.
+///
+/// The \e Entry of a Region is the first BasicBlock that is passed after
+/// entering the Region. It is an element of the Region. The entry BasicBlock
+/// dominates all BasicBlocks in the Region.
+///
+/// The \e Exit of a Region is the first BasicBlock that is passed after
+/// leaving the Region. It is not an element of the Region. The exit BasicBlock,
+/// postdominates all BasicBlocks in the Region.
+///
+/// A <em> canonical Region </em> cannot be constructed by combining smaller
+/// Regions.
+///
+/// Region A is the \e parent of Region B, if B is completely contained in A.
+///
+/// Two canonical Regions either do not intersect at all or one is
+/// the parent of the other.
+///
+/// The <em> Program Structure Tree</em> is a graph (V, E) where V is the set of
+/// Regions in the control flow graph and E is the \e parent relation of these
+/// Regions.
+///
+/// Example:
+///
+/// \verbatim
+/// A simple control flow graph, that contains two regions.
+///
+/// 1
+/// / |
+/// 2 |
+/// / \ 3
+/// 4 5 |
+/// | | |
+/// 6 7 8
+/// \ | /
+/// \ |/ Region A: 1 -> 9 {1,2,3,4,5,6,7,8}
+/// 9 Region B: 2 -> 9 {2,4,5,6,7}
+/// \endverbatim
+///
+/// You can obtain more examples by either calling
+///
+/// <tt> "opt -regions -analyze anyprogram.ll" </tt>
+/// or
+/// <tt> "opt -view-regions-only anyprogram.ll" </tt>
+///
+/// on any LLVM file you are interested in.
+///
+/// The first call returns a textual representation of the program structure
+/// tree, the second one creates a graphical representation using graphviz.
+class Region : public RegionNode {
+ friend class RegionInfo;
+ // DO NOT IMPLEMENT
+ Region(const Region &);
+ // DO NOT IMPLEMENT
+ const Region &operator=(const Region &);
+
+ // Information necessary to manage this Region.
+ RegionInfo* RI;
+ DominatorTree *DT;
+
+ // The exit BasicBlock of this region.
+ // (The entry BasicBlock is part of RegionNode)
+ BasicBlock *exit;
+
+ typedef std::vector<Region*> RegionSet;
+
+ // The subregions of this region.
+ RegionSet children;
+
+ typedef std::map<BasicBlock*, RegionNode*> BBNodeMapT;
+
+ // Save the BasicBlock RegionNodes that are element of this Region.
+ mutable BBNodeMapT BBNodeMap;
+
+ /// verifyBBInRegion - Check if a BB is in this Region. This check also works
+ /// if the region is incorrectly built. (EXPENSIVE!)
+ void verifyBBInRegion(BasicBlock* BB) const;
+
+ /// verifyWalk - Walk over all the BBs of the region starting from BB and
+ /// verify that all reachable basic blocks are elements of the region.
+ /// (EXPENSIVE!)
+ void verifyWalk(BasicBlock* BB, std::set<BasicBlock*>* visitedBB) const;
+
+ /// verifyRegionNest - Verify if the region and its children are valid
+ /// regions (EXPENSIVE!)
+ void verifyRegionNest() const;
+
+public:
+ /// @brief Create a new region.
+ ///
+ /// @param Entry The entry basic block of the region.
+ /// @param Exit The exit basic block of the region.
+ /// @param RI The region info object that is managing this region.
+ /// @param DT The dominator tree of the current function.
+ /// @param Parent The surrounding region or NULL if this is a top level
+ /// region.
+ Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RI,
+ DominatorTree *DT, Region *Parent = 0);
+
+ /// Delete the Region and all its subregions.
+ ~Region();
+
+ /// @brief Get the entry BasicBlock of the Region.
+ /// @return The entry BasicBlock of the region.
+ BasicBlock *getEntry() const { return RegionNode::getEntry(); }
+
+ /// @brief Get the exit BasicBlock of the Region.
+ /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
+ /// Region.
+ BasicBlock *getExit() const { return exit; }
+
+ /// @brief Get the parent of the Region.
+ /// @return The parent of the Region or NULL if this is a top level
+ /// Region.
+ Region *getParent() const { return RegionNode::getParent(); }
+
+ /// @brief Get the RegionNode representing the current Region.
+ /// @return The RegionNode representing the current Region.
+ RegionNode* getNode() const {
+ return const_cast<RegionNode*>(reinterpret_cast<const RegionNode*>(this));
+ }
+
+ /// @brief Get the nesting level of this Region.
+ ///
+ /// An toplevel Region has depth 0.
+ ///
+ /// @return The depth of the region.
+ unsigned getDepth() const;
+
+ /// @brief Is this a simple region?
+ ///
+ /// A region is simple if it has exactly one exit and one entry edge.
+ ///
+ /// @return True if the Region is simple.
+ bool isSimple() const;
+
+ /// @brief Returns the name of the Region.
+ /// @return The Name of the Region.
+ std::string getNameStr() const;
+
+ /// @brief Return the RegionInfo object, that belongs to this Region.
+ RegionInfo *getRegionInfo() const {
+ return RI;
+ }
+
+ /// @brief Print the region.
+ ///
+ /// @param OS The output stream the Region is printed to.
+ /// @param printTree Print also the tree of subregions.
+ /// @param level The indentation level used for printing.
+ void print(raw_ostream& OS, bool printTree = true, unsigned level = 0) const;
+
+ /// @brief Print the region to stderr.
+ void dump() const;
+
+ /// @brief Check if the region contains a BasicBlock.
+ ///
+ /// @param BB The BasicBlock that might be contained in this Region.
+ /// @return True if the block is contained in the region otherwise false.
+ bool contains(const BasicBlock *BB) const;
+
+ /// @brief Check if the region contains another region.
+ ///
+ /// @param SubRegion The region that might be contained in this Region.
+ /// @return True if SubRegion is contained in the region otherwise false.
+ bool contains(const Region *SubRegion) const {
+ // Toplevel Region.
+ if (!getExit())
+ return true;
+
+ return contains(SubRegion->getEntry())
+ && (contains(SubRegion->getExit()) || SubRegion->getExit() == getExit());
+ }
+
+ /// @brief Check if the region contains an Instruction.
+ ///
+ /// @param Inst The Instruction that might be contained in this region.
+ /// @return True if the Instruction is contained in the region otherwise false.
+ bool contains(const Instruction *Inst) const {
+ return contains(Inst->getParent());
+ }
+
+ /// @brief Check if the region contains a loop.
+ ///
+ /// @param L The loop that might be contained in this region.
+ /// @return True if the loop is contained in the region otherwise false.
+ /// In case a NULL pointer is passed to this function the result
+ /// is false, except for the region that describes the whole function.
+ /// In that case true is returned.
+ bool contains(const Loop *L) const;
+
+ /// @brief Get the outermost loop in the region that contains a loop.
+ ///
+ /// Find for a Loop L the outermost loop OuterL that is a parent loop of L
+ /// and is itself contained in the region.
+ ///
+ /// @param L The loop the lookup is started.
+ /// @return The outermost loop in the region, NULL if such a loop does not
+ /// exist or if the region describes the whole function.
+ Loop *outermostLoopInRegion(Loop *L) const;
+
+ /// @brief Get the outermost loop in the region that contains a basic block.
+ ///
+ /// Find for a basic block BB the outermost loop L that contains BB and is
+ /// itself contained in the region.
+ ///
+ /// @param LI A pointer to a LoopInfo analysis.
+ /// @param BB The basic block surrounded by the loop.
+ /// @return The outermost loop in the region, NULL if such a loop does not
+ /// exist or if the region describes the whole function.
+ Loop *outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const;
+
+ /// @brief Get the subregion that starts at a BasicBlock
+ ///
+ /// @param BB The BasicBlock the subregion should start.
+ /// @return The Subregion if available, otherwise NULL.
+ Region* getSubRegionNode(BasicBlock *BB) const;
+
+ /// @brief Get the RegionNode for a BasicBlock
+ ///
+ /// @param BB The BasicBlock at which the RegionNode should start.
+ /// @return If available, the RegionNode that represents the subregion
+ /// starting at BB. If no subregion starts at BB, the RegionNode
+ /// representing BB.
+ RegionNode* getNode(BasicBlock *BB) const;
+
+ /// @brief Get the BasicBlock RegionNode for a BasicBlock
+ ///
+ /// @param BB The BasicBlock for which the RegionNode is requested.
+ /// @return The RegionNode representing the BB.
+ RegionNode* getBBNode(BasicBlock *BB) const;
+
+ /// @brief Add a new subregion to this Region.
+ ///
+ /// @param SubRegion The new subregion that will be added.
+ void addSubRegion(Region *SubRegion);
+
+ /// @brief Remove a subregion from this Region.
+ ///
+ /// The subregion is not deleted, as it will probably be inserted into another
+ /// region.
+ /// @param SubRegion The SubRegion that will be removed.
+ Region *removeSubRegion(Region *SubRegion);
+
+ /// @brief Move all direct child nodes of this Region to another Region.
+ ///
+ /// @param To The Region the child nodes will be transfered to.
+ void transferChildrenTo(Region *To);
+
+ /// @brief Verify if the region is a correct region.
+ ///
+ /// Check if this is a correctly build Region. This is an expensive check, as
+ /// the complete CFG of the Region will be walked.
+ void verifyRegion() const;
+
+ /// @brief Clear the cache for BB RegionNodes.
+ ///
+ /// After calling this function the BasicBlock RegionNodes will be stored at
+ /// different memory locations. RegionNodes obtained before this function is
+ /// called are therefore not comparable to RegionNodes abtained afterwords.
+ void clearNodeCache();
+
+ /// @name Subregion Iterators
+ ///
+ /// These iterators iterator over all subregions of this Region.
+ //@{
+ typedef RegionSet::iterator iterator;
+ typedef RegionSet::const_iterator const_iterator;
+
+ iterator begin() { return children.begin(); }
+ iterator end() { return children.end(); }
+
+ const_iterator begin() const { return children.begin(); }
+ const_iterator end() const { return children.end(); }
+ //@}
+
+ /// @name BasicBlock Iterators
+ ///
+ /// These iterators iterate over all BasicBlock RegionNodes that are
+ /// contained in this Region. The iterator also iterates over BasicBlocks
+ /// that are elements of a subregion of this Region. It is therefore called a
+ /// flat iterator.
+ //@{
+ typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
+ GraphTraits<FlatIt<RegionNode*> > > block_iterator;
+
+ typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
+ false, GraphTraits<FlatIt<const RegionNode*> > >
+ const_block_iterator;
+
+ block_iterator block_begin();
+ block_iterator block_end();
+
+ const_block_iterator block_begin() const;
+ const_block_iterator block_end() const;
+ //@}
+
+ /// @name Element Iterators
+ ///
+ /// These iterators iterate over all BasicBlock and subregion RegionNodes that
+ /// are direct children of this Region. It does not iterate over any
+ /// RegionNodes that are also element of a subregion of this Region.
+ //@{
+ typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
+ GraphTraits<RegionNode*> > element_iterator;
+
+ typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
+ false, GraphTraits<const RegionNode*> >
+ const_element_iterator;
+
+ element_iterator element_begin();
+ element_iterator element_end();
+
+ const_element_iterator element_begin() const;
+ const_element_iterator element_end() const;
+ //@}
+};
+
+//===----------------------------------------------------------------------===//
+/// @brief Analysis that detects all canonical Regions.
+///
+/// The RegionInfo pass detects all canonical regions in a function. The Regions
+/// are connected using the parent relation. This builds a Program Structure
+/// Tree.
+class RegionInfo : public FunctionPass {
+ typedef DenseMap<BasicBlock*,BasicBlock*> BBtoBBMap;
+ typedef DenseMap<BasicBlock*, Region*> BBtoRegionMap;
+ typedef SmallPtrSet<Region*, 4> RegionSet;
+
+ // DO NOT IMPLEMENT
+ RegionInfo(const RegionInfo &);
+ // DO NOT IMPLEMENT
+ const RegionInfo &operator=(const RegionInfo &);
+
+ DominatorTree *DT;
+ PostDominatorTree *PDT;
+ DominanceFrontier *DF;
+
+ /// The top level region.
+ Region *TopLevelRegion;
+
+ /// Map every BB to the smallest region, that contains BB.
+ BBtoRegionMap BBtoRegion;
+
+ // isCommonDomFrontier - Returns true if BB is in the dominance frontier of
+ // entry, because it was inherited from exit. In the other case there is an
+ // edge going from entry to BB without passing exit.
+ bool isCommonDomFrontier(BasicBlock* BB, BasicBlock* entry,
+ BasicBlock* exit) const;
+
+ // isRegion - Check if entry and exit surround a valid region, based on
+ // dominance tree and dominance frontier.
+ bool isRegion(BasicBlock* entry, BasicBlock* exit) const;
+
+ // insertShortCut - Saves a shortcut pointing from entry to exit.
+ // This function may extend this shortcut if possible.
+ void insertShortCut(BasicBlock* entry, BasicBlock* exit,
+ BBtoBBMap* ShortCut) const;
+
+ // getNextPostDom - Returns the next BB that postdominates N, while skipping
+ // all post dominators that cannot finish a canonical region.
+ DomTreeNode *getNextPostDom(DomTreeNode* N, BBtoBBMap *ShortCut) const;
+
+ // isTrivialRegion - A region is trivial, if it contains only one BB.
+ bool isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const;
+
+ // createRegion - Creates a single entry single exit region.
+ Region *createRegion(BasicBlock *entry, BasicBlock *exit);
+
+ // findRegionsWithEntry - Detect all regions starting with bb 'entry'.
+ void findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut);
+
+ // scanForRegions - Detects regions in F.
+ void scanForRegions(Function &F, BBtoBBMap *ShortCut);
+
+ // getTopMostParent - Get the top most parent with the same entry block.
+ Region *getTopMostParent(Region *region);
+
+ // buildRegionsTree - build the region hierarchy after all region detected.
+ void buildRegionsTree(DomTreeNode *N, Region *region);
+
+ // Calculate - detecte all regions in function and build the region tree.
+ void Calculate(Function& F);
+
+ void releaseMemory();
+
+ // updateStatistics - Update statistic about created regions.
+ void updateStatistics(Region *R);
+
+ // isSimple - Check if a region is a simple region with exactly one entry
+ // edge and exactly one exit edge.
+ bool isSimple(Region* R) const;
+
+public:
+ static char ID;
+ explicit RegionInfo();
+
+ ~RegionInfo();
+
+ /// @name FunctionPass interface
+ //@{
+ virtual bool runOnFunction(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual void print(raw_ostream &OS, const Module *) const;
+ virtual void verifyAnalysis() const;
+ //@}
+
+ /// @brief Get the smallest region that contains a BasicBlock.
+ ///
+ /// @param BB The basic block.
+ /// @return The smallest region, that contains BB or NULL, if there is no
+ /// region containing BB.
+ Region *getRegionFor(BasicBlock *BB) const;
+
+ /// @brief A shortcut for getRegionFor().
+ ///
+ /// @param BB The basic block.
+ /// @return The smallest region, that contains BB or NULL, if there is no
+ /// region containing BB.
+ Region *operator[](BasicBlock *BB) const;
+
+ /// @brief Return the exit of the maximal refined region, that starts at a
+ /// BasicBlock.
+ ///
+ /// @param BB The BasicBlock the refined region starts.
+ BasicBlock *getMaxRegionExit(BasicBlock *BB) const;
+
+ /// @brief Find the smallest region that contains two regions.
+ ///
+ /// @param A The first region.
+ /// @param B The second region.
+ /// @return The smallest region containing A and B.
+ Region *getCommonRegion(Region* A, Region *B) const;
+
+ /// @brief Find the smallest region that contains two basic blocks.
+ ///
+ /// @param A The first basic block.
+ /// @param B The second basic block.
+ /// @return The smallest region that contains A and B.
+ Region* getCommonRegion(BasicBlock* A, BasicBlock *B) const {
+ return getCommonRegion(getRegionFor(A), getRegionFor(B));
+ }
+
+ /// @brief Find the smallest region that contains a set of regions.
+ ///
+ /// @param Regions A vector of regions.
+ /// @return The smallest region that contains all regions in Regions.
+ Region* getCommonRegion(SmallVectorImpl<Region*> &Regions) const;
+
+ /// @brief Find the smallest region that contains a set of basic blocks.
+ ///
+ /// @param BBs A vector of basic blocks.
+ /// @return The smallest region that contains all basic blocks in BBS.
+ Region* getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const;
+
+ Region *getTopLevelRegion() const {
+ return TopLevelRegion;
+ }
+
+ /// @brief Clear the Node Cache for all Regions.
+ ///
+ /// @see Region::clearNodeCache()
+ void clearNodeCache() {
+ if (TopLevelRegion)
+ TopLevelRegion->clearNodeCache();
+ }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node) {
+ if (Node.isSubRegion())
+ return OS << Node.getNodeAs<Region>()->getNameStr();
+ else
+ return OS << Node.getNodeAs<BasicBlock>()->getNameStr();
+}
+} // End llvm namespace
+#endif
+
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
new file mode 100644
index 000000000000..ced5b528cbb1
--- /dev/null
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -0,0 +1,342 @@
+//===- RegionIterator.h - Iterators to iteratate over Regions ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines the iterators to iterate over the elements of a Region.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ANALYSIS_REGION_ITERATOR_H
+#define LLVM_ANALYSIS_REGION_ITERATOR_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+/// @brief Hierachical RegionNode successor iterator.
+///
+/// This iterator iterates over all successors of a RegionNode.
+///
+/// For a BasicBlock RegionNode it skips all BasicBlocks that are not part of
+/// the parent Region. Furthermore for BasicBlocks that start a subregion, a
+/// RegionNode representing the subregion is returned.
+///
+/// For a subregion RegionNode there is just one successor. The RegionNode
+/// representing the exit of the subregion.
+template<class NodeType>
+class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
+ NodeType, ptrdiff_t>
+{
+ typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+ // The iterator works in two modes, bb mode or region mode.
+ enum ItMode{
+ // In BB mode it returns all successors of this BasicBlock as its
+ // successors.
+ ItBB,
+ // In region mode there is only one successor, thats the regionnode mapping
+ // to the exit block of the regionnode
+ ItRgBegin, // At the beginning of the regionnode successor.
+ ItRgEnd // At the end of the regionnode successor.
+ };
+
+ // Use two bit to represent the mode iterator.
+ PointerIntPair<NodeType*, 2, enum ItMode> Node;
+
+ // The block successor iterator.
+ succ_iterator BItor;
+
+ // advanceRegionSucc - A region node has only one successor. It reaches end
+ // once we advance it.
+ void advanceRegionSucc() {
+ assert(Node.getInt() == ItRgBegin && "Cannot advance region successor!");
+ Node.setInt(ItRgEnd);
+ }
+
+ NodeType* getNode() const{ return Node.getPointer(); }
+
+ // isRegionMode - Is the current iterator in region mode?
+ bool isRegionMode() const { return Node.getInt() != ItBB; }
+
+ // Get the immediate successor. This function may return a Basic Block
+ // RegionNode or a subregion RegionNode.
+ RegionNode* getISucc(BasicBlock* BB) const {
+ RegionNode *succ;
+ succ = getNode()->getParent()->getNode(BB);
+ assert(succ && "BB not in Region or entered subregion!");
+ return succ;
+ }
+
+ // getRegionSucc - Return the successor basic block of a SubRegion RegionNode.
+ inline BasicBlock* getRegionSucc() const {
+ assert(Node.getInt() == ItRgBegin && "Cannot get the region successor!");
+ return getNode()->template getNodeAs<Region>()->getExit();
+ }
+
+ // isExit - Is this the exit BB of the Region?
+ inline bool isExit(BasicBlock* BB) const {
+ return getNode()->getParent()->getExit() == BB;
+ }
+public:
+ typedef RNSuccIterator<NodeType> Self;
+
+ typedef typename super::pointer pointer;
+
+ /// @brief Create begin iterator of a RegionNode.
+ inline RNSuccIterator(NodeType* node)
+ : Node(node, node->isSubRegion() ? ItRgBegin : ItBB),
+ BItor(succ_begin(node->getEntry())) {
+
+
+ // Skip the exit block
+ if (!isRegionMode())
+ while (succ_end(node->getEntry()) != BItor && isExit(*BItor))
+ ++BItor;
+
+ if (isRegionMode() && isExit(getRegionSucc()))
+ advanceRegionSucc();
+ }
+
+ /// @brief Create an end iterator.
+ inline RNSuccIterator(NodeType* node, bool)
+ : Node(node, node->isSubRegion() ? ItRgEnd : ItBB),
+ BItor(succ_end(node->getEntry())) {}
+
+ inline bool operator==(const Self& x) const {
+ assert(isRegionMode() == x.isRegionMode() && "Broken iterator!");
+ if (isRegionMode())
+ return Node.getInt() == x.Node.getInt();
+ else
+ return BItor == x.BItor;
+ }
+
+ inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+ inline pointer operator*() const {
+ BasicBlock* BB = isRegionMode() ? getRegionSucc() : *BItor;
+ assert(!isExit(BB) && "Iterator out of range!");
+ return getISucc(BB);
+ }
+
+ inline Self& operator++() {
+ if(isRegionMode()) {
+ // The Region only has 1 successor.
+ advanceRegionSucc();
+ } else {
+ // Skip the exit.
+ do
+ ++BItor;
+ while (BItor != succ_end(getNode()->getEntry())
+ && isExit(*BItor));
+ }
+ return *this;
+ }
+
+ inline Self operator++(int) {
+ Self tmp = *this;
+ ++*this;
+ return tmp;
+ }
+
+ inline const Self &operator=(const Self &I) {
+ if (this != &I) {
+ assert(getNode()->getParent() == I.getNode()->getParent()
+ && "Cannot assign iterators of two different regions!");
+ Node = I.Node;
+ BItor = I.BItor;
+ }
+ return *this;
+ }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// @brief Flat RegionNode iterator.
+///
+/// The Flat Region iterator will iterate over all BasicBlock RegionNodes that
+/// are contained in the Region and its subregions. This is close to a virtual
+/// control flow graph of the Region.
+template<class NodeType>
+class RNSuccIterator<FlatIt<NodeType> >
+ : public std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t>
+{
+ typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+ NodeType* Node;
+ succ_iterator Itor;
+
+public:
+ typedef RNSuccIterator<FlatIt<NodeType> > Self;
+ typedef typename super::pointer pointer;
+
+ /// @brief Create the iterator from a RegionNode.
+ ///
+ /// Note that the incoming node must be a bb node, otherwise it will trigger
+ /// an assertion when we try to get a BasicBlock.
+ inline RNSuccIterator(NodeType* node) : Node(node),
+ Itor(succ_begin(node->getEntry())) {
+ assert(!Node->isSubRegion()
+ && "Subregion node not allowed in flat iterating mode!");
+ assert(Node->getParent() && "A BB node must have a parent!");
+
+ // Skip the exit block of the iterating region.
+ while (succ_end(Node->getEntry()) != Itor
+ && Node->getParent()->getExit() == *Itor)
+ ++Itor;
+ }
+ /// @brief Create an end iterator
+ inline RNSuccIterator(NodeType* node, bool) : Node(node),
+ Itor(succ_end(node->getEntry())) {
+ assert(!Node->isSubRegion()
+ && "Subregion node not allowed in flat iterating mode!");
+ }
+
+ inline bool operator==(const Self& x) const {
+ assert(Node->getParent() == x.Node->getParent()
+ && "Cannot compare iterators of different regions!");
+
+ return Itor == x.Itor && Node == x.Node;
+ }
+
+ inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+ inline pointer operator*() const {
+ BasicBlock* BB = *Itor;
+
+ // Get the iterating region.
+ Region* Parent = Node->getParent();
+
+ // The only case that the successor reaches out of the region is it reaches
+ // the exit of the region.
+ assert(Parent->getExit() != BB && "iterator out of range!");
+
+ return Parent->getBBNode(BB);
+ }
+
+ inline Self& operator++() {
+ // Skip the exit block of the iterating region.
+ do
+ ++Itor;
+ while (Itor != succ_end(Node->getEntry())
+ && Node->getParent()->getExit() == *Itor);
+
+ return *this;
+ }
+
+ inline Self operator++(int) {
+ Self tmp = *this;
+ ++*this;
+ return tmp;
+ }
+
+ inline const Self &operator=(const Self &I) {
+ if (this != &I) {
+ assert(Node->getParent() == I.Node->getParent()
+ && "Cannot assign iterators to two different regions!");
+ Node = I.Node;
+ Itor = I.Itor;
+ }
+ return *this;
+ }
+};
+
+template<class NodeType>
+inline RNSuccIterator<NodeType> succ_begin(NodeType* Node) {
+ return RNSuccIterator<NodeType>(Node);
+}
+
+template<class NodeType>
+inline RNSuccIterator<NodeType> succ_end(NodeType* Node) {
+ return RNSuccIterator<NodeType>(Node, true);
+}
+
+//===--------------------------------------------------------------------===//
+// RegionNode GraphTraits specialization so the bbs in the region can be
+// iterate by generic graph iterators.
+//
+// NodeT can either be region node or const region node, otherwise child_begin
+// and child_end fail.
+
+#define RegionNodeGraphTraits(NodeT) \
+ template<> struct GraphTraits<NodeT*> { \
+ typedef NodeT NodeType; \
+ typedef RNSuccIterator<NodeType> ChildIteratorType; \
+ static NodeType *getEntryNode(NodeType* N) { return N; } \
+ static inline ChildIteratorType child_begin(NodeType *N) { \
+ return RNSuccIterator<NodeType>(N); \
+ } \
+ static inline ChildIteratorType child_end(NodeType *N) { \
+ return RNSuccIterator<NodeType>(N, true); \
+ } \
+}; \
+template<> struct GraphTraits<FlatIt<NodeT*> > { \
+ typedef NodeT NodeType; \
+ typedef RNSuccIterator<FlatIt<NodeT> > ChildIteratorType; \
+ static NodeType *getEntryNode(NodeType* N) { return N; } \
+ static inline ChildIteratorType child_begin(NodeType *N) { \
+ return RNSuccIterator<FlatIt<NodeType> >(N); \
+ } \
+ static inline ChildIteratorType child_end(NodeType *N) { \
+ return RNSuccIterator<FlatIt<NodeType> >(N, true); \
+ } \
+}
+
+#define RegionGraphTraits(RegionT, NodeT) \
+template<> struct GraphTraits<RegionT*> \
+ : public GraphTraits<NodeT*> { \
+ typedef df_iterator<NodeType*> nodes_iterator; \
+ static NodeType *getEntryNode(RegionT* R) { \
+ return R->getNode(R->getEntry()); \
+ } \
+ static nodes_iterator nodes_begin(RegionT* R) { \
+ return nodes_iterator::begin(getEntryNode(R)); \
+ } \
+ static nodes_iterator nodes_end(RegionT* R) { \
+ return nodes_iterator::end(getEntryNode(R)); \
+ } \
+}; \
+template<> struct GraphTraits<FlatIt<RegionT*> > \
+ : public GraphTraits<FlatIt<NodeT*> > { \
+ typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false, \
+ GraphTraits<FlatIt<NodeType*> > > nodes_iterator; \
+ static NodeType *getEntryNode(RegionT* R) { \
+ return R->getBBNode(R->getEntry()); \
+ } \
+ static nodes_iterator nodes_begin(RegionT* R) { \
+ return nodes_iterator::begin(getEntryNode(R)); \
+ } \
+ static nodes_iterator nodes_end(RegionT* R) { \
+ return nodes_iterator::end(getEntryNode(R)); \
+ } \
+}
+
+RegionNodeGraphTraits(RegionNode);
+RegionNodeGraphTraits(const RegionNode);
+
+RegionGraphTraits(Region, RegionNode);
+RegionGraphTraits(const Region, const RegionNode);
+
+template <> struct GraphTraits<RegionInfo*>
+ : public GraphTraits<FlatIt<RegionNode*> > {
+ typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
+ GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+
+ static NodeType *getEntryNode(RegionInfo *RI) {
+ return GraphTraits<FlatIt<Region*> >::getEntryNode(RI->getTopLevelRegion());
+ }
+ static nodes_iterator nodes_begin(RegionInfo* RI) {
+ return nodes_iterator::begin(getEntryNode(RI));
+ }
+ static nodes_iterator nodes_end(RegionInfo *RI) {
+ return nodes_iterator::end(getEntryNode(RI));
+ }
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/RegionPrinter.h b/include/llvm/Analysis/RegionPrinter.h
new file mode 100644
index 000000000000..758748aad9e6
--- /dev/null
+++ b/include/llvm/Analysis/RegionPrinter.h
@@ -0,0 +1,26 @@
+//===-- RegionPrinter.h - Region printer external interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines external functions that can be called to explicitly
+// instantiate the region printer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_REGIONPRINTER_H
+#define LLVM_ANALYSIS_REGIONPRINTER_H
+
+namespace llvm {
+ class FunctionPass;
+ FunctionPass *createRegionViewerPass();
+ FunctionPass *createRegionOnlyViewerPass();
+ FunctionPass *createRegionPrinterPass();
+ FunctionPass *createRegionOnlyPrinterPass();
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 8da3af0c7a56..1fa94e9c311c 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -44,12 +44,17 @@ namespace llvm {
class Loop;
class LoopInfo;
class Operator;
+ class SCEVUnknown;
+ class SCEV;
+ template<> struct FoldingSetTrait<SCEV>;
/// SCEV - This class represents an analyzed expression in the program. These
/// are opaque objects that the client is not allowed to do much with
/// directly.
///
class SCEV : public FoldingSetNode {
+ friend struct FoldingSetTrait<SCEV>;
+
/// FastID - A reference to an Interned FoldingSetNodeID for this node.
/// The ScalarEvolution's BumpPtrAllocator holds the data.
FoldingSetNodeIDRef FastID;
@@ -73,9 +78,6 @@ namespace llvm {
unsigned getSCEVType() const { return SCEVType; }
- /// Profile - FoldingSet support.
- void Profile(FoldingSetNodeID& ID) { ID = FastID; }
-
/// isLoopInvariant - Return true if the value of this SCEV is unchanging in
/// the specified loop.
virtual bool isLoopInvariant(const Loop *L) const = 0;
@@ -125,6 +127,21 @@ namespace llvm {
void dump() const;
};
+ // Specialize FoldingSetTrait for SCEV to avoid needing to compute
+ // temporary FoldingSetNodeID values.
+ template<> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> {
+ static void Profile(const SCEV &X, FoldingSetNodeID& ID) {
+ ID = X.FastID;
+ }
+ static bool Equals(const SCEV &X, const FoldingSetNodeID &ID,
+ FoldingSetNodeID &TempID) {
+ return ID == X.FastID;
+ }
+ static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) {
+ return X.FastID.ComputeHash();
+ }
+ };
+
inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) {
S.print(OS);
return OS;
@@ -175,6 +192,7 @@ namespace llvm {
friend class SCEVCallbackVH;
friend class SCEVExpander;
+ friend class SCEVUnknown;
/// F - The function we are analyzing.
///
@@ -196,9 +214,14 @@ namespace llvm {
/// counts and things.
SCEVCouldNotCompute CouldNotCompute;
- /// Scalars - This is a cache of the scalars we have analyzed so far.
+ /// ValueExprMapType - The typedef for ValueExprMap.
///
- std::map<SCEVCallbackVH, const SCEV *> Scalars;
+ typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
+ ValueExprMapType;
+
+ /// ValueExprMap - This is a cache of the values we have analyzed so far.
+ ///
+ ValueExprMapType ValueExprMap;
/// BackedgeTakenInfo - Information about the backedge-taken count
/// of a loop. This currently includes an exact count and a maximum count.
@@ -263,7 +286,7 @@ namespace llvm {
/// ForgetSymbolicValue - This looks up computed SCEV values for all
/// instructions that depend on the given instruction and removes them from
- /// the Scalars map if they reference SymName. This is used during PHI
+ /// the ValueExprMap map if they reference SymName. This is used during PHI
/// resolution.
void ForgetSymbolicName(Instruction *I, const SCEV *SymName);
@@ -350,10 +373,11 @@ namespace llvm {
std::pair<BasicBlock *, BasicBlock *>
getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
- /// isImpliedCond - Test whether the condition described by Pred, LHS,
- /// and RHS is true whenever the given Cond value evaluates to true.
- bool isImpliedCond(Value *Cond, ICmpInst::Predicate Pred,
+ /// isImpliedCond - Test whether the condition described by Pred, LHS, and
+ /// RHS is true whenever the given FoundCondValue value evaluates to true.
+ bool isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
bool Inverse);
/// isImpliedCondOperands - Test whether the condition described by Pred,
@@ -659,6 +683,11 @@ namespace llvm {
private:
FoldingSet<SCEV> UniqueSCEVs;
BumpPtrAllocator SCEVAllocator;
+
+ /// FirstUnknown - The head of a linked list of all SCEVUnknown
+ /// values that have been allocated. This is used by releaseMemory
+ /// to locate them all and call their destructors.
+ SCEVUnknown *FirstUnknown;
};
}
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 9501555aacf1..4b02f82035fe 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -18,6 +18,7 @@
#include "llvm/Analysis/ScalarEvolutionNormalization.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/TargetFolder.h"
+#include "llvm/Support/ValueHandle.h"
#include <set>
namespace llvm {
@@ -31,8 +32,8 @@ namespace llvm {
ScalarEvolution &SE;
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
InsertedExpressions;
- std::set<Value*> InsertedValues;
- std::set<Value*> InsertedPostIncValues;
+ std::set<AssertingVH<Value> > InsertedValues;
+ std::set<AssertingVH<Value> > InsertedPostIncValues;
/// PostIncLoops - Addrecs referring to any of the given loops are expanded
/// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
@@ -70,13 +71,18 @@ namespace llvm {
/// clear - Erase the contents of the InsertedExpressions map so that users
/// trying to expand the same expression into multiple BasicBlocks or
/// different places within the same BasicBlock can do so.
- void clear() { InsertedExpressions.clear(); }
+ void clear() {
+ InsertedExpressions.clear();
+ InsertedValues.clear();
+ InsertedPostIncValues.clear();
+ }
/// getOrInsertCanonicalInductionVariable - This method returns the
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
/// starts at zero and steps by one on each iteration.
- Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty);
+ PHINode *getOrInsertCanonicalInductionVariable(const Loop *L,
+ const Type *Ty);
/// expandCodeFor - Insert code to directly compute the specified SCEV
/// expression into the program. The inserted code is inserted into the
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 74242031eddd..4213a287011b 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -202,33 +202,14 @@ namespace llvm {
op_iterator op_begin() const { return Operands; }
op_iterator op_end() const { return Operands + NumOperands; }
- virtual bool isLoopInvariant(const Loop *L) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i)->isLoopInvariant(L)) return false;
- return true;
- }
+ virtual bool isLoopInvariant(const Loop *L) const;
// hasComputableLoopEvolution - N-ary expressions have computable loop
// evolutions iff they have at least one operand that varies with the loop,
// but that all varying operands are computable.
- virtual bool hasComputableLoopEvolution(const Loop *L) const {
- bool HasVarying = false;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i)->isLoopInvariant(L)) {
- if (getOperand(i)->hasComputableLoopEvolution(L))
- HasVarying = true;
- else
- return false;
- }
- return HasVarying;
- }
+ virtual bool hasComputableLoopEvolution(const Loop *L) const;
- virtual bool hasOperand(const SCEV *O) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (O == getOperand(i) || getOperand(i)->hasOperand(O))
- return true;
- return false;
- }
+ virtual bool hasOperand(const SCEV *O) const;
bool dominates(BasicBlock *BB, DominatorTree *DT) const;
@@ -520,15 +501,28 @@ namespace llvm {
/// value, and only represent it as its LLVM Value. This is the "bottom"
/// value for the analysis.
///
- class SCEVUnknown : public SCEV {
+ class SCEVUnknown : public SCEV, private CallbackVH {
friend class ScalarEvolution;
- Value *V;
- SCEVUnknown(const FoldingSetNodeIDRef ID, Value *v) :
- SCEV(ID, scUnknown), V(v) {}
+ // Implement CallbackVH.
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *New);
+
+ /// SE - The parent ScalarEvolution value. This is used to update
+ /// the parent's maps when the value associated with a SCEVUnknown
+ /// is deleted or RAUW'd.
+ ScalarEvolution *SE;
+
+ /// Next - The next pointer in the linked list of all
+ /// SCEVUnknown instances owned by a ScalarEvolution.
+ SCEVUnknown *Next;
+
+ SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
+ ScalarEvolution *se, SCEVUnknown *next) :
+ SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
public:
- Value *getValue() const { return V; }
+ Value *getValue() const { return getValPtr(); }
/// isSizeOf, isAlignOf, isOffsetOf - Test whether this is a special
/// constant representing a type size, alignment, or field offset in
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index b9634f04ec4c..7b6026fea0a6 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -77,25 +77,6 @@ namespace llvm {
///
bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
- /// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose
- /// it into a base pointer with a constant offset and a number of scaled
- /// symbolic offsets.
- ///
- /// The scaled symbolic offsets (represented by pairs of a Value* and a scale
- /// in the VarIndices vector) are Value*'s that are known to be scaled by the
- /// specified amount, but which may have other unrepresented high bits. As
- /// such, the gep cannot necessarily be reconstructed from its decomposed
- /// form.
- ///
- /// When TargetData is around, this function is capable of analyzing
- /// everything that Value::getUnderlyingObject() can look through. When not,
- /// it just looks through pointer casts.
- ///
- const Value *DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
- SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices,
- const TargetData *TD);
-
-
/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
/// the scalar value indexed is already around as a register, for example if
diff --git a/include/llvm/Assembly/AsmAnnotationWriter.h b/include/llvm/Assembly/AssemblyAnnotationWriter.h
index 6d7572016411..3a65f97a5b50 100644
--- a/include/llvm/Assembly/AsmAnnotationWriter.h
+++ b/include/llvm/Assembly/AssemblyAnnotationWriter.h
@@ -1,4 +1,4 @@
-//===-- AsmAnnotationWriter.h - Itf for annotation .ll files - --*- C++ -*-===//
+//===-- AssemblyAnnotationWriter.h - Annotation .ll files -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -32,21 +32,26 @@ public:
/// emitFunctionAnnot - This may be implemented to emit a string right before
/// the start of a function.
- virtual void emitFunctionAnnot(const Function *F, raw_ostream &OS) {}
+ virtual void emitFunctionAnnot(const Function *F,
+ formatted_raw_ostream &OS) {}
/// emitBasicBlockStartAnnot - This may be implemented to emit a string right
- /// after the basic block label, but before the first instruction in the block.
- virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, raw_ostream &OS){
+ /// after the basic block label, but before the first instruction in the
+ /// block.
+ virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {
}
/// emitBasicBlockEndAnnot - This may be implemented to emit a string right
/// after the basic block.
- virtual void emitBasicBlockEndAnnot(const BasicBlock *BB, raw_ostream &OS){
+ virtual void emitBasicBlockEndAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {
}
/// emitInstructionAnnot - This may be implemented to emit a string right
/// before an instruction is emitted.
- virtual void emitInstructionAnnot(const Instruction *I, raw_ostream &OS) {}
+ virtual void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) {}
/// printInfoComment - This may be implemented to emit a comment to the
/// right of an instruction or global value.
diff --git a/include/llvm/AutoUpgrade.h b/include/llvm/AutoUpgrade.h
index 0a81c807956f..5ce20b69e2fb 100644
--- a/include/llvm/AutoUpgrade.h
+++ b/include/llvm/AutoUpgrade.h
@@ -16,6 +16,7 @@
namespace llvm {
class Module;
+ class GlobalVariable;
class Function;
class CallInst;
@@ -35,6 +36,10 @@ namespace llvm {
/// so that it can update all calls to the old function.
void UpgradeCallsToIntrinsic(Function* F);
+ /// This checks for global variables which should be upgraded. It returns true
+ /// if it requires upgrading.
+ bool UpgradeGlobalVariable(GlobalVariable *GV);
+
/// This function checks debug info intrinsics. If an intrinsic is invalid
/// then this function simply removes the intrinsic.
void CheckDebugInfoIntrinsics(Module *M);
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index 83a37585fa17..934e764b6587 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -297,7 +297,7 @@ class Archive {
/// its symbol table without reading in any of the archive's members. This
/// reduces both I/O and cpu time in opening the archive if it is to be used
/// solely for symbol lookup (e.g. during linking). The \p Filename must
- /// exist and be an archive file or an exception will be thrown. This form
+ /// exist and be an archive file or an error will be returned. This form
/// of opening the archive is intended for read-only operations that need to
/// locate members via the symbol table for link editing. Since the archve
/// members are not read by this method, the archive will appear empty upon
@@ -306,8 +306,7 @@ class Archive {
/// if this form of opening the archive is used that only the symbol table
/// lookup methods (getSymbolTable, findModuleDefiningSymbol, and
/// findModulesDefiningSymbols) be used.
- /// @throws std::string if an error occurs opening the file
- /// @returns an Archive* that represents the archive file.
+ /// @returns an Archive* that represents the archive file, or null on error.
/// @brief Open an existing archive and load its symbols.
static Archive* OpenAndLoadSymbols(
const sys::Path& Filename, ///< Name of the archive file to open
@@ -319,7 +318,6 @@ class Archive {
/// closes files. It does nothing with the archive file on disk. If you
/// haven't used the writeToDisk method by the time the destructor is
/// called, all changes to the archive will be lost.
- /// @throws std::string if an error occurs
/// @brief Destruct in-memory archive
~Archive();
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 31d513cfb021..bfb3a4e49c51 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -88,7 +88,7 @@ public:
//===--------------------------------------------------------------------===//
void Emit(uint32_t Val, unsigned NumBits) {
- assert(NumBits <= 32 && "Invalid value size!");
+ assert(NumBits && NumBits <= 32 && "Invalid value size!");
assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
CurValue |= Val << CurBit;
if (CurBit + NumBits < 32) {
@@ -277,10 +277,12 @@ private:
switch (Op.getEncoding()) {
default: assert(0 && "Unknown encoding!");
case BitCodeAbbrevOp::Fixed:
- Emit((unsigned)V, (unsigned)Op.getEncodingData());
+ if (Op.getEncodingData())
+ Emit((unsigned)V, (unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::VBR:
- EmitVBR64(V, (unsigned)Op.getEncodingData());
+ if (Op.getEncodingData())
+ EmitVBR64(V, (unsigned)Op.getEncodingData());
break;
case BitCodeAbbrevOp::Char6:
Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index de9b64d4a46c..4f9b783aa97b 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -94,8 +94,7 @@ namespace bitc {
TYPE_CODE_FP128 = 14, // LONG DOUBLE (112 bit mantissa)
TYPE_CODE_PPC_FP128= 15, // PPC LONG DOUBLE (2 doubles)
- TYPE_CODE_METADATA = 16, // METADATA
- TYPE_CODE_UNION = 17 // UNION: [eltty x N]
+ TYPE_CODE_METADATA = 16 // METADATA
};
// The type symbol table only has one code (TST_ENTRY_CODE).
@@ -111,12 +110,20 @@ namespace bitc {
enum MetadataCodes {
METADATA_STRING = 1, // MDSTRING: [values]
- METADATA_NODE = 2, // MDNODE: [n x (type num, value num)]
- METADATA_FN_NODE = 3, // FN_MDNODE: [n x (type num, value num)]
+ // FIXME: Remove NODE in favor of NODE2 in LLVM 3.0
+ METADATA_NODE = 2, // NODE with potentially invalid metadata
+ // FIXME: Remove FN_NODE in favor of FN_NODE2 in LLVM 3.0
+ METADATA_FN_NODE = 3, // FN_NODE with potentially invalid metadata
METADATA_NAME = 4, // STRING: [values]
- METADATA_NAMED_NODE = 5, // NAMEDMDNODE: [n x mdnodes]
+ // FIXME: Remove NAMED_NODE in favor of NAMED_NODE2 in LLVM 3.0
+ METADATA_NAMED_NODE = 5, // NAMED_NODE with potentially invalid metadata
METADATA_KIND = 6, // [n x [id, name]]
- METADATA_ATTACHMENT = 7 // [m x [value, [n x [id, mdnode]]]
+ // FIXME: Remove ATTACHMENT in favor of ATTACHMENT2 in LLVM 3.0
+ METADATA_ATTACHMENT = 7, // ATTACHMENT with potentially invalid metadata
+ METADATA_NODE2 = 8, // NODE2: [n x (type num, value num)]
+ METADATA_FN_NODE2 = 9, // FN_NODE2: [n x (type num, value num)]
+ METADATA_NAMED_NODE2 = 10, // NAMED_NODE2: [n x mdnodes]
+ METADATA_ATTACHMENT2 = 11 // [m x [value, [n x [id, mdnode]]]
};
// The constants block (CONSTANTS_BLOCK_ID) describes emission for each
// constant and maintains an implicit current type value.
@@ -224,7 +231,8 @@ namespace bitc {
FUNC_CODE_INST_LOAD = 20, // LOAD: [opty, op, align, vol]
// FIXME: Remove STORE in favor of STORE2 in LLVM 3.0
FUNC_CODE_INST_STORE = 21, // STORE: [valty,val,ptr, align, vol]
- FUNC_CODE_INST_CALL = 22, // CALL: [attr, fnty, fnid, args...]
+ // FIXME: Remove CALL in favor of CALL2 in LLVM 3.0
+ FUNC_CODE_INST_CALL = 22, // CALL with potentially invalid metadata
FUNC_CODE_INST_VAARG = 23, // VAARG: [valistty, valist, instty]
// This store code encodes the pointer type, rather than the value type
// this is so information only available in the pointer type (e.g. address
@@ -242,8 +250,13 @@ namespace bitc {
FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
FUNC_CODE_INST_INDIRECTBR = 31, // INDIRECTBR: [opty, op0, op1, ...]
- FUNC_CODE_DEBUG_LOC = 32, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal]
- FUNC_CODE_DEBUG_LOC_AGAIN = 33 // DEBUG_LOC_AGAIN
+ // FIXME: Remove DEBUG_LOC in favor of DEBUG_LOC2 in LLVM 3.0
+ FUNC_CODE_DEBUG_LOC = 32, // DEBUG_LOC with potentially invalid metadata
+ FUNC_CODE_DEBUG_LOC_AGAIN = 33, // DEBUG_LOC_AGAIN
+
+ FUNC_CODE_INST_CALL2 = 34, // CALL2: [attr, fnty, fnid, args...]
+
+ FUNC_CODE_DEBUG_LOC2 = 35 // DEBUG_LOC2: [Line,Col,ScopeVal, IAVal]
};
} // End bitc namespace
} // End llvm namespace
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
index e11b9677c74a..7154aa3259d2 100644
--- a/include/llvm/CallGraphSCCPass.h
+++ b/include/llvm/CallGraphSCCPass.h
@@ -33,8 +33,7 @@ class CallGraphSCC;
class CallGraphSCCPass : public Pass {
public:
- explicit CallGraphSCCPass(intptr_t pid) : Pass(PT_CallGraphSCC, pid) {}
- explicit CallGraphSCCPass(void *pid) : Pass(PT_CallGraphSCC, pid) {}
+ explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
/// createPrinterPass - Get a pass that prints the Module
/// corresponding to a CallGraph.
@@ -64,7 +63,7 @@ public:
/// Assign pass manager to manager this pass
virtual void assignPassManager(PMStack &PMS,
- PassManagerType PMT =PMT_CallGraphPassManager);
+ PassManagerType PMT);
/// Return what kind of Pass Manager can manage this pass.
virtual PassManagerType getPotentialPassManagerType() const {
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 7ca6c6257f2e..b018603b314e 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -54,6 +54,7 @@ namespace llvm {
class Mangler;
class TargetLoweringObjectFile;
class TargetData;
+ class TargetMachine;
class Twine;
class Type;
@@ -296,7 +297,7 @@ namespace llvm {
MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA) const;
MCSymbol *GetBlockAddressSymbol(const BasicBlock *BB) const;
- //===------------------------------------------------------------------===//
+ //===------------------------------------------------------------------===//
// Emission Helper Routines.
//===------------------------------------------------------------------===//
public:
@@ -327,6 +328,12 @@ namespace llvm {
void EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
const MCSymbol *Lo, unsigned Size) const;
+ /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+ /// where the size in bytes of the directive is specified by Size and Label
+ /// specifies the label. This implicitly uses .set if it is available.
+ void EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size) const;
+
//===------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===------------------------------------------------------------------===//
@@ -369,6 +376,10 @@ namespace llvm {
/// operands.
virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ /// getISAEncoding - Get the value for DW_AT_APPLE_isa. Zero if no isa
+ /// encoding specified.
+ virtual unsigned getISAEncoding() { return 0; }
+
//===------------------------------------------------------------------===//
// Dwarf Lowering Routines
//===------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 2fc03bd41de8..240734fb2e5e 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -12,10 +12,35 @@
#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/DenseMap.h"
namespace llvm {
class LiveInterval;
+ class LiveIntervals;
+ class MachineLoopInfo;
+
+ /// VirtRegAuxInfo - Calculate auxiliary information for a virtual
+ /// register such as its spill weight and allocation hint.
+ class VirtRegAuxInfo {
+ MachineFunction &mf_;
+ LiveIntervals &lis_;
+ const MachineLoopInfo &loops_;
+ DenseMap<unsigned, float> hint_;
+ public:
+ VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
+ const MachineLoopInfo &loops) :
+ mf_(mf), lis_(lis), loops_(loops) {}
+
+ /// CalculateRegClass - recompute the register class for reg from its uses.
+ /// Since the register class can affect the allocation hint, this function
+ /// should be called before CalculateWeightAndHint if both are called.
+ void CalculateRegClass(unsigned reg);
+
+ /// CalculateWeightAndHint - (re)compute li's spill weight and allocation
+ /// hint.
+ void CalculateWeightAndHint(LiveInterval &li);
+ };
/// CalculateSpillWeights - Compute spill weights for all virtual register
/// live intervals.
@@ -23,11 +48,11 @@ namespace llvm {
public:
static char ID;
- CalculateSpillWeights() : MachineFunctionPass(&ID) {}
+ CalculateSpillWeights() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &au) const;
- virtual bool runOnMachineFunction(MachineFunction &fn);
+ virtual bool runOnMachineFunction(MachineFunction &fn);
private:
/// Returns true if the given live interval is zero length.
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 7911907e8943..6fb843641dcd 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -275,6 +275,12 @@ public:
return Result;
}
+ /// Version of AllocateStack with extra register to be shadowed.
+ unsigned AllocateStack(unsigned Size, unsigned Align, unsigned ShadowReg) {
+ MarkAllocated(ShadowReg);
+ return AllocateStack(Size, Align);
+ }
+
// HandleByVal - Allocate a stack slot large enough to pass an argument by
// value. The size and alignment information of the argument is encoded in its
// parameter attribute.
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index c49d1edb20f2..f17fe5a146fc 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -77,6 +77,9 @@ public:
/// anywhere in the function.
DenseMap<const AllocaInst*, int> StaticAllocaMap;
+ /// ByValArgFrameIndexMap - Keep track of frame indices for byval arguments.
+ DenseMap<const Argument*, int> ByValArgFrameIndexMap;
+
/// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
/// function arguments that are inserted after scheduling is completed.
SmallVector<MachineInstr*, 8> ArgDbgValues;
@@ -138,6 +141,13 @@ public:
assert(R == 0 && "Already initialized this value register!");
return R = CreateRegs(V->getType());
}
+
+ /// setByValArgumentFrameIndex - Record frame index for the byval
+ /// argument.
+ void setByValArgumentFrameIndex(const Argument *A, int FI);
+
+ /// getByValArgumentFrameIndex - Get frame index for the byval argument.
+ int getByValArgumentFrameIndex(const Argument *A);
};
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 69de5986dd8f..2e23f4e44e32 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -603,7 +603,7 @@ namespace ISD {
/// which do not reference a specific memory location should be less than
/// this value. Those that do must not be less than this value, and can
/// be used with SelectionDAG::getMemIntrinsicNode.
- static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+100;
+ static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+150;
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
@@ -633,7 +633,6 @@ namespace ISD {
/// (the result of the load and the result of the base +/- offset
/// computation); a post-indexed store produces one value (the
/// the result of the base +/- offset computation).
- ///
enum MemIndexedMode {
UNINDEXED = 0,
PRE_INC,
@@ -651,10 +650,8 @@ namespace ISD {
/// integer result type.
/// ZEXTLOAD loads the integer operand and zero extends it to a larger
/// integer result type.
- /// EXTLOAD is used for three things: floating point extending loads,
- /// integer extending loads [the top bits are undefined], and vector
- /// extending loads [load into low elt].
- ///
+ /// EXTLOAD is used for two things: floating point extending loads and
+ /// integer extending loads [the top bits are undefined].
enum LoadExtType {
NON_EXTLOAD = 0,
EXTLOAD,
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 8d80efbc5c77..29e689a52145 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -39,7 +39,7 @@ namespace llvm {
/// This class holds information about a machine level values, including
/// definition and use points.
///
- /// Care must be taken in interpreting the def index of the value. The
+ /// Care must be taken in interpreting the def index of the value. The
/// following rules apply:
///
/// If the isDefAccurate() method returns false then def does not contain the
@@ -108,7 +108,7 @@ namespace llvm {
/// For a stack interval, returns the reg which this stack interval was
/// defined from.
- /// For a register interval the behaviour of this method is undefined.
+ /// For a register interval the behaviour of this method is undefined.
unsigned getReg() const { return cr.reg; }
/// For a stack interval, set the defining register.
/// This method should not be called on register intervals as it may lead
@@ -189,7 +189,7 @@ namespace llvm {
}
/// containsRange - Return true if the given range, [S, E), is covered by
- /// this range.
+ /// this range.
bool containsRange(SlotIndex S, SlotIndex E) const {
assert((S < E) && "Backwards interval?");
return (start <= S && S < end) && (start < E && E <= end);
@@ -236,7 +236,7 @@ namespace llvm {
float weight; // weight of this interval
Ranges ranges; // the ranges in which this register is live
VNInfoList valnos; // value#'s
-
+
struct InstrSlots {
enum {
LOAD = 0,
@@ -281,7 +281,7 @@ namespace llvm {
while (I->end <= Pos) ++I;
return I;
}
-
+
void clear() {
valnos.clear();
ranges.clear();
@@ -305,7 +305,7 @@ namespace llvm {
bool containsOneValue() const { return valnos.size() == 1; }
unsigned getNumValNums() const { return (unsigned)valnos.size(); }
-
+
/// getValNumInfo - Returns pointer to the specified val#.
///
inline VNInfo *getValNumInfo(unsigned ValNo) {
@@ -336,6 +336,11 @@ namespace llvm {
return VNI;
}
+ /// RenumberValues - Renumber all values in order of appearance and remove
+ /// unused values.
+ /// Recalculate phi-kill flags in case any phi-def values were removed.
+ void RenumberValues(LiveIntervals &lis);
+
/// isOnlyLROfValNo - Return true if the specified live range is the only
/// one defined by the its val#.
bool isOnlyLROfValNo(const LiveRange *LR) {
@@ -346,7 +351,7 @@ namespace llvm {
}
return true;
}
-
+
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
/// LiveRanges with the V1 value number with the V2 value number. This can
@@ -387,7 +392,7 @@ namespace llvm {
/// except for the register of the interval.
void Copy(const LiveInterval &RHS, MachineRegisterInfo *MRI,
VNInfo::Allocator &VNInfoAllocator);
-
+
bool empty() const { return ranges.empty(); }
/// beginIndex - Return the lowest numbered slot covered by interval.
@@ -454,17 +459,19 @@ namespace llvm {
iterator FindLiveRangeContaining(SlotIndex Idx);
/// findDefinedVNInfo - Find the by the specified
- /// index (register interval) or defined
+ /// index (register interval) or defined
VNInfo *findDefinedVNInfoForRegInt(SlotIndex Idx) const;
/// findDefinedVNInfo - Find the VNInfo that's defined by the specified
/// register (stack inteval only).
VNInfo *findDefinedVNInfoForStackInt(unsigned Reg) const;
-
+
/// overlaps - Return true if the intersection of the two live intervals is
/// not empty.
bool overlaps(const LiveInterval& other) const {
+ if (other.empty())
+ return false;
return overlapsFrom(other, other.begin());
}
@@ -514,6 +521,15 @@ namespace llvm {
///
unsigned getSize() const;
+ /// Returns true if the live interval is zero length, i.e. no live ranges
+ /// span instructions. It doesn't pay to spill such an interval.
+ bool isZeroLength() const {
+ for (const_iterator i = begin(), e = end(); i != e; ++i)
+ if (i->end.getPrevIndex() > i->start)
+ return false;
+ return true;
+ }
+
/// isSpillable - Can this interval be spilled?
bool isSpillable() const {
return weight != HUGE_VALF;
@@ -543,6 +559,7 @@ namespace llvm {
Ranges::iterator addRangeFrom(LiveRange LR, Ranges::iterator From);
void extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd);
Ranges::iterator extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStr);
+ void markValNoForDeletion(VNInfo *V);
LiveInterval& operator=(const LiveInterval& rhs); // DO NOT IMPLEMENT
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index c136048c9c7b..2918c3c2abe8 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -42,7 +42,7 @@ namespace llvm {
class TargetInstrInfo;
class TargetRegisterClass;
class VirtRegMap;
-
+
class LiveIntervals : public MachineFunctionPass {
MachineFunction* mf_;
MachineRegisterInfo* mri_;
@@ -68,7 +68,7 @@ namespace llvm {
public:
static char ID; // Pass identification, replacement for typeid
- LiveIntervals() : MachineFunctionPass(&ID) {}
+ LiveIntervals() : MachineFunctionPass(ID) {}
// Calculate the spill weight to assign to a single instruction.
static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth);
@@ -105,6 +105,12 @@ namespace llvm {
return r2iMap_.count(reg);
}
+ /// isAllocatable - is the physical register reg allocatable in the current
+ /// function?
+ bool isAllocatable(unsigned reg) const {
+ return allocatableRegs_.test(reg);
+ }
+
/// getScaledIntervalSize - get the size of an interval in "units,"
/// where every function is composed of one thousand units. This
/// measure scales properly with empty index slots in the function.
@@ -117,7 +123,7 @@ namespace llvm {
unsigned getFuncInstructionCount() {
return indexes_->getFunctionSize();
}
-
+
/// getApproximateInstructionCount - computes an estimate of the number
/// of instructions in a given LiveInterval.
unsigned getApproximateInstructionCount(LiveInterval& I) {
@@ -149,7 +155,7 @@ namespace llvm {
/// dupInterval - Duplicate a live interval. The caller is responsible for
/// managing the allocated memory.
LiveInterval *dupInterval(LiveInterval *li);
-
+
/// addLiveRangeToEndOfBlock - Given a register and an instruction,
/// adds a live range from that instruction to the end of its MBB.
LiveRange addLiveRangeToEndOfBlock(unsigned reg,
@@ -181,7 +187,7 @@ namespace llvm {
SlotIndex getInstructionIndex(const MachineInstr *instr) const {
return indexes_->getInstructionIndex(instr);
}
-
+
/// Returns the instruction associated with the given index.
MachineInstr* getInstructionFromIndex(SlotIndex index) const {
return indexes_->getInstructionFromIndex(index);
@@ -190,12 +196,32 @@ namespace llvm {
/// Return the first index in the given basic block.
SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
return indexes_->getMBBStartIdx(mbb);
- }
+ }
/// Return the last index in the given basic block.
SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
return indexes_->getMBBEndIdx(mbb);
- }
+ }
+
+ bool isLiveInToMBB(const LiveInterval &li,
+ const MachineBasicBlock *mbb) const {
+ return li.liveAt(getMBBStartIdx(mbb));
+ }
+
+ LiveRange* findEnteringRange(LiveInterval &li,
+ const MachineBasicBlock *mbb) {
+ return li.getLiveRangeContaining(getMBBStartIdx(mbb));
+ }
+
+ bool isLiveOutOfMBB(const LiveInterval &li,
+ const MachineBasicBlock *mbb) const {
+ return li.liveAt(getMBBEndIdx(mbb).getPrevSlot());
+ }
+
+ LiveRange* findExitingRange(LiveInterval &li,
+ const MachineBasicBlock *mbb) {
+ return li.getLiveRangeContaining(getMBBEndIdx(mbb).getPrevSlot());
+ }
MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
return indexes_->getMBBFromIndex(index);
@@ -217,6 +243,10 @@ namespace llvm {
indexes_->replaceMachineInstrInMaps(MI, NewMI);
}
+ void InsertMBBInMaps(MachineBasicBlock *MBB) {
+ indexes_->insertMBBInMaps(MBB);
+ }
+
bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
return indexes_->findLiveInMBBs(Start, End, MBBs);
@@ -276,7 +306,7 @@ namespace llvm {
/// within a single basic block.
bool intervalIsInOneMBB(const LiveInterval &li) const;
- private:
+ private:
/// computeIntervals - Compute live intervals.
void computeIntervals();
@@ -290,7 +320,7 @@ namespace llvm {
/// isPartialRedef - Return true if the specified def at the specific index
/// is partially re-defining the specified live interval. A common case of
- /// this is a definition of the sub-register.
+ /// this is a definition of the sub-register.
bool isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
LiveInterval &interval);
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index c6af6a1f89ce..ad984db1899e 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -39,7 +39,7 @@ namespace llvm {
public:
static char ID; // Pass identification, replacement for typeid
- LiveStacks() : MachineFunctionPass(&ID) {}
+ LiveStacks() : MachineFunctionPass(ID) {}
typedef SS2IntervalMap::iterator iterator;
typedef SS2IntervalMap::const_iterator const_iterator;
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index fc5ea6f968bd..c8182e073b9c 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -46,7 +46,7 @@ class TargetRegisterInfo;
class LiveVariables : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LiveVariables() : MachineFunctionPass(&ID) {}
+ LiveVariables() : MachineFunctionPass(ID) {}
/// VarInfo - This represents the regions where a virtual register is live in
/// the program. We represent this with three different pieces of
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 9471316d25d7..dca65ef6d407 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_MACHINEFRAMEINFO_H
#include "llvm/ADT/SmallVector.h"
+//#include "llvm/ADT/IndexedMap.h"
#include "llvm/System/DataTypes.h"
#include <cassert>
#include <vector>
@@ -30,15 +31,15 @@ class TargetFrameInfo;
class BitVector;
/// The CalleeSavedInfo class tracks the information need to locate where a
-/// callee saved register in the current frame.
+/// callee saved register is in the current frame.
class CalleeSavedInfo {
unsigned Reg;
int FrameIdx;
-
+
public:
explicit CalleeSavedInfo(unsigned R, int FI = 0)
: Reg(R), FrameIdx(FI) {}
-
+
// Accessors.
unsigned getReg() const { return Reg; }
int getFrameIdx() const { return FrameIdx; }
@@ -81,7 +82,7 @@ class MachineFrameInfo {
// SPOffset - The offset of this object from the stack pointer on entry to
// the function. This field has no meaning for a variable sized element.
int64_t SPOffset;
-
+
// The size of this object on the stack. 0 means a variable sized object,
// ~0ULL means a dead object.
uint64_t Size;
@@ -94,13 +95,23 @@ class MachineFrameInfo {
// default, fixed objects are immutable unless marked otherwise.
bool isImmutable;
- // isSpillSlot - If true, the stack object is used as spill slot. It
+ // isSpillSlot - If true the stack object is used as spill slot. It
// cannot alias any other memory objects.
bool isSpillSlot;
- StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, bool isSS)
+ // MayNeedSP - If true the stack object triggered the creation of the stack
+ // protector. We should allocate this object right after the stack
+ // protector.
+ bool MayNeedSP;
+
+ // PreAllocated - If true, the object was mapped into the local frame
+ // block and doesn't need additional handling for allocation beyond that.
+ bool PreAllocated;
+
+ StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
+ bool isSS, bool NSP)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
- isSpillSlot(isSS) {}
+ isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {}
};
/// Objects - The list of stack objects allocated...
@@ -132,7 +143,7 @@ class MachineFrameInfo {
/// to be allocated on entry to the function.
///
uint64_t StackSize;
-
+
/// OffsetAdjustment - The amount that a frame offset needs to be adjusted to
/// have the actual offset from the stack/frame pointer. The exact usage of
/// this is target-dependent, but it is typically used to adjust between
@@ -143,10 +154,10 @@ class MachineFrameInfo {
/// TargetRegisterInfo::getFrameIndexOffset); when generating code, the
/// corresponding adjustments are performed directly.
int OffsetAdjustment;
-
- /// MaxAlignment - The prolog/epilog code inserter may process objects
+
+ /// MaxAlignment - The prolog/epilog code inserter may process objects
/// that require greater alignment than the default alignment the target
- /// provides. To handle this, MaxAlignment is set to the maximum alignment
+ /// provides. To handle this, MaxAlignment is set to the maximum alignment
/// needed by the objects on the current frame. If this is greater than the
/// native alignment maintained by the compiler, dynamic alignment code will
/// be needed.
@@ -171,7 +182,7 @@ class MachineFrameInfo {
/// insertion.
///
unsigned MaxCallFrameSize;
-
+
/// CSInfo - The prolog/epilog code inserter fills in this vector with each
/// callee saved register saved in the frame. Beyond its use by the prolog/
/// epilog code inserter, this data used for debug info and exception
@@ -189,8 +200,24 @@ class MachineFrameInfo {
///
const TargetFrameInfo &TFI;
+ /// LocalFrameObjects - References to frame indices which are mapped
+ /// into the local frame allocation block. <FrameIdx, LocalOffset>
+ SmallVector<std::pair<int, int64_t>, 32> LocalFrameObjects;
+
+ /// LocalFrameSize - Size of the pre-allocated local frame block.
+ int64_t LocalFrameSize;
+
+ /// Required alignment of the local object blob, which is the strictest
+ /// alignment of any object in it.
+ unsigned LocalFrameMaxAlign;
+
+ /// Whether the local object blob needs to be allocated together. If not,
+ /// PEI should ignore the isPreAllocated flags on the stack objects and
+ /// just allocate them normally.
+ bool UseLocalStackAllocationBlock;
+
public:
- explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) {
+ explicit MachineFrameInfo(const TargetFrameInfo &tfi) : TFI(tfi) {
StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
HasVarSizedObjects = false;
FrameAddressTaken = false;
@@ -200,6 +227,9 @@ public:
StackProtectorIdx = -1;
MaxCallFrameSize = 0;
CSIValid = false;
+ LocalFrameSize = 0;
+ LocalFrameMaxAlign = 0;
+ UseLocalStackAllocationBlock = false;
}
/// hasStackObjects - Return true if there are any stack objects in this
@@ -225,8 +255,8 @@ public:
bool isFrameAddressTaken() const { return FrameAddressTaken; }
void setFrameAddressIsTaken(bool T) { FrameAddressTaken = T; }
- /// isReturnAddressTaken - This method may be called any time after instruction
- /// selection is complete to determine if there is a call to
+ /// isReturnAddressTaken - This method may be called any time after
+ /// instruction selection is complete to determine if there is a call to
/// \@llvm.returnaddress in this function.
bool isReturnAddressTaken() const { return ReturnAddressTaken; }
void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
@@ -239,13 +269,64 @@ public:
///
int getObjectIndexEnd() const { return (int)Objects.size()-NumFixedObjects; }
- /// getNumFixedObjects() - Return the number of fixed objects.
+ /// getNumFixedObjects - Return the number of fixed objects.
unsigned getNumFixedObjects() const { return NumFixedObjects; }
- /// getNumObjects() - Return the number of objects.
+ /// getNumObjects - Return the number of objects.
///
unsigned getNumObjects() const { return Objects.size(); }
+ /// mapLocalFrameObject - Map a frame index into the local object block
+ void mapLocalFrameObject(int ObjectIndex, int64_t Offset) {
+ LocalFrameObjects.push_back(std::pair<int, int64_t>(ObjectIndex, Offset));
+ Objects[ObjectIndex + NumFixedObjects].PreAllocated = true;
+ }
+
+ /// getLocalFrameObjectMap - Get the local offset mapping for a for an object
+ std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
+ assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
+ "Invalid local object reference!");
+ return LocalFrameObjects[i];
+ }
+
+ /// getLocalFrameObjectCount - Return the number of objects allocated into
+ /// the local object block.
+ int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
+
+ /// setLocalFrameSize - Set the size of the local object blob.
+ void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; }
+
+ /// getLocalFrameSize - Get the size of the local object blob.
+ int64_t getLocalFrameSize() const { return LocalFrameSize; }
+
+ /// setLocalFrameMaxAlign - Required alignment of the local object blob,
+ /// which is the strictest alignment of any object in it.
+ void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; }
+
+ /// getLocalFrameMaxAlign - Return the required alignment of the local
+ /// object blob.
+ unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
+
+ /// getUseLocalStackAllocationBlock - Get whether the local allocation blob
+ /// should be allocated together or let PEI allocate the locals in it
+ /// directly.
+ bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;}
+
+ /// setUseLocalStackAllocationBlock - Set whether the local allocation blob
+ /// should be allocated together or let PEI allocate the locals in it
+ /// directly.
+ void setUseLocalStackAllocationBlock(bool v) {
+ UseLocalStackAllocationBlock = v;
+ }
+
+ /// isObjectPreAllocated - Return true if the object was pre-allocated into
+ /// the local block.
+ bool isObjectPreAllocated(int ObjectIdx) const {
+ assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ return Objects[ObjectIdx+NumFixedObjects].PreAllocated;
+ }
+
/// getObjectSize - Return the size of the specified object.
///
int64_t getObjectSize(int ObjectIdx) const {
@@ -276,6 +357,14 @@ public:
MaxAlignment = std::max(MaxAlignment, Align);
}
+ /// NeedsStackProtector - Returns true if the object may need stack
+ /// protectors.
+ bool MayNeedStackProtector(int ObjectIdx) const {
+ assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+ "Invalid Object Idx!");
+ return Objects[ObjectIdx+NumFixedObjects].MayNeedSP;
+ }
+
/// getObjectOffset - Return the assigned stack offset of the specified object
/// from the incoming stack pointer.
///
@@ -307,21 +396,21 @@ public:
/// setStackSize - Set the size of the stack...
///
void setStackSize(uint64_t Size) { StackSize = Size; }
-
+
/// getOffsetAdjustment - Return the correction for frame offsets.
///
int getOffsetAdjustment() const { return OffsetAdjustment; }
-
+
/// setOffsetAdjustment - Set the correction for frame offsets.
///
void setOffsetAdjustment(int Adj) { OffsetAdjustment = Adj; }
- /// getMaxAlignment - Return the alignment in bytes that this function must be
- /// aligned to, which is greater than the default stack alignment provided by
+ /// getMaxAlignment - Return the alignment in bytes that this function must be
+ /// aligned to, which is greater than the default stack alignment provided by
/// the target.
///
unsigned getMaxAlignment() const { return MaxAlignment; }
-
+
/// setMaxAlignment - Set the preferred alignment.
///
void setMaxAlignment(unsigned Align) { MaxAlignment = Align; }
@@ -350,8 +439,8 @@ public:
/// index with a negative value.
///
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
-
-
+
+
/// isFixedObjectIndex - Returns true if the specified index corresponds to a
/// fixed stack object.
bool isFixedObjectIndex(int ObjectIdx) const {
@@ -382,25 +471,26 @@ public:
return Objects[ObjectIdx+NumFixedObjects].Size == ~0ULL;
}
- /// CreateStackObject - Create a new statically sized stack object,
- /// returning a nonnegative identifier to represent it.
+ /// CreateStackObject - Create a new statically sized stack object, returning
+ /// a nonnegative identifier to represent it.
///
- int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS) {
+ int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
+ bool MayNeedSP = false) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
- Objects.push_back(StackObject(Size, Alignment, 0, false, isSS));
- int Index = (int)Objects.size()-NumFixedObjects-1;
+ Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP));
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
MaxAlignment = std::max(MaxAlignment, Alignment);
return Index;
}
- /// CreateSpillStackObject - Create a new statically sized stack
- /// object that represents a spill slot, returning a nonnegative
- /// identifier to represent it.
+ /// CreateSpillStackObject - Create a new statically sized stack object that
+ /// represents a spill slot, returning a nonnegative identifier to represent
+ /// it.
///
int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
- CreateStackObject(Size, Alignment, true);
- int Index = (int)Objects.size()-NumFixedObjects-1;
+ CreateStackObject(Size, Alignment, true, false);
+ int Index = (int)Objects.size() - NumFixedObjects - 1;
MaxAlignment = std::max(MaxAlignment, Alignment);
return Index;
}
@@ -417,9 +507,10 @@ public:
/// variable sized object is created, whether or not the index returned is
/// actually used.
///
- int CreateVariableSizedObject() {
+ int CreateVariableSizedObject(unsigned Alignment) {
HasVarSizedObjects = true;
- Objects.push_back(StackObject(0, 1, 0, false, false));
+ Objects.push_back(StackObject(0, Alignment, 0, false, false, true));
+ MaxAlignment = std::max(MaxAlignment, Alignment);
return (int)Objects.size()-NumFixedObjects-1;
}
@@ -431,7 +522,7 @@ public:
/// setCalleeSavedInfo - Used by prolog/epilog inserter to set the function's
/// callee saved information.
- void setCalleeSavedInfo(const std::vector<CalleeSavedInfo> &CSI) {
+ void setCalleeSavedInfo(const std::vector<CalleeSavedInfo> &CSI) {
CSInfo = CSI;
}
@@ -452,7 +543,7 @@ public:
BitVector getPristineRegs(const MachineBasicBlock *MBB) const;
/// print - Used by the MachineFunction printer to print information about
- /// stack objects. Implemented in MachineFunction.cpp
+ /// stack objects. Implemented in MachineFunction.cpp
///
void print(const MachineFunction &MF, raw_ostream &OS) const;
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 409d13ee3fc5..5bb453dd50fa 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -266,7 +266,7 @@ public:
/// verify - Run the current MachineFunction through the machine code
/// verifier, useful for debugger use.
- void verify(Pass *p=NULL, bool allowDoubleDefs=false) const;
+ void verify(Pass *p=NULL) const;
// Provide accessors for the MachineBasicBlock list...
typedef BasicBlockListType::iterator iterator;
diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h
index 685e86824c31..b7bf0a36c447 100644
--- a/include/llvm/CodeGen/MachineFunctionPass.h
+++ b/include/llvm/CodeGen/MachineFunctionPass.h
@@ -31,8 +31,7 @@ class MachineFunction;
/// override runOnMachineFunction.
class MachineFunctionPass : public FunctionPass {
protected:
- explicit MachineFunctionPass(intptr_t ID) : FunctionPass(ID) {}
- explicit MachineFunctionPass(void *ID) : FunctionPass(ID) {}
+ explicit MachineFunctionPass(char &ID) : FunctionPass(ID) {}
/// runOnMachineFunction - This method must be overloaded to perform the
/// desired machine code transformation or analysis.
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index e67b2dda1141..f843196105dd 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -201,12 +201,14 @@ public:
/// isLabel - Returns true if the MachineInstr represents a label.
///
bool isLabel() const {
- return getOpcode() == TargetOpcode::DBG_LABEL ||
+ return getOpcode() == TargetOpcode::PROLOG_LABEL ||
getOpcode() == TargetOpcode::EH_LABEL ||
getOpcode() == TargetOpcode::GC_LABEL;
}
- bool isDebugLabel() const { return getOpcode() == TargetOpcode::DBG_LABEL; }
+ bool isPrologLabel() const {
+ return getOpcode() == TargetOpcode::PROLOG_LABEL;
+ }
bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; }
bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; }
bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index 3b3e31e02afa..9760eba7b86e 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -67,7 +67,7 @@ class MachineLoopInfo : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- MachineLoopInfo() : MachineFunctionPass(&ID) {}
+ MachineLoopInfo() : MachineFunctionPass(ID) {}
LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 50e38b447feb..0e719c86c18e 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -344,7 +344,7 @@ public:
VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Loc)));
}
- VariableDbgInfoMapTy &getVariableDbgInfo();
+ VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfo; }
}; // End class MachineModuleInfo
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7445ec7c92a2..4762a39cc669 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -30,55 +30,55 @@ namespace llvm {
/// createUnreachableBlockEliminationPass - The LLVM code generator does not
/// work well with unreachable basic blocks (what live ranges make sense for a
/// block that cannot be reached?). As such, a code generator should either
- /// not instruction select unreachable blocks, or it can run this pass as it's
+ /// not instruction select unreachable blocks, or run this pass as its
/// last LLVM modifying pass to clean up blocks that are not reachable from
/// the entry block.
FunctionPass *createUnreachableBlockEliminationPass();
/// MachineFunctionPrinter pass - This pass prints out the machine function to
- /// the given stream, as a debugging tool.
+ /// the given stream as a debugging tool.
MachineFunctionPass *
createMachineFunctionPrinterPass(raw_ostream &OS,
const std::string &Banner ="");
/// MachineLoopInfo pass - This pass is a loop analysis pass.
- ///
- extern const PassInfo *const MachineLoopInfoID;
+ ///
+ extern char &MachineLoopInfoID;
/// MachineDominators pass - This pass is a machine dominators analysis pass.
- ///
- extern const PassInfo *const MachineDominatorsID;
+ ///
+ extern char &MachineDominatorsID;
/// PHIElimination pass - This pass eliminates machine instruction PHI nodes
/// by inserting copy instructions. This destroys SSA information, but is the
/// desired input for some register allocators. This pass is "required" by
/// these register allocator like this: AU.addRequiredID(PHIEliminationID);
///
- extern const PassInfo *const PHIEliminationID;
-
+ extern char &PHIEliminationID;
+
/// StrongPHIElimination pass - This pass eliminates machine instruction PHI
/// nodes by inserting copy instructions. This destroys SSA information, but
/// is the desired input for some register allocators. This pass is
/// "required" by these register allocator like this:
/// AU.addRequiredID(PHIEliminationID);
/// This pass is still in development
- extern const PassInfo *const StrongPHIEliminationID;
+ extern char &StrongPHIEliminationID;
- extern const PassInfo *const PreAllocSplittingID;
+ extern char &PreAllocSplittingID;
/// SimpleRegisterCoalescing pass. Aggressively coalesces every register
/// copy it can.
///
- extern const PassInfo *const SimpleRegisterCoalescingID;
+ extern char &SimpleRegisterCoalescingID;
/// TwoAddressInstruction pass - This pass reduces two-address instructions to
/// use two operands. This destroys SSA information but it is desired by
/// register allocators.
- extern const PassInfo *const TwoAddressInstructionPassID;
+ extern char &TwoAddressInstructionPassID;
/// UnreachableMachineBlockElimination pass - This pass removes unreachable
/// machine basic blocks.
- extern const PassInfo *const UnreachableMachineBlockElimID;
+ extern char &UnreachableMachineBlockElimID;
/// DeadMachineInstructionElim pass - This pass removes dead machine
/// instructions.
@@ -114,7 +114,7 @@ namespace llvm {
/// and eliminates abstract frame references.
///
FunctionPass *createPrologEpilogCodeInserter();
-
+
/// LowerSubregs Pass - This pass lowers subregs to register-register copies
/// which yields suboptimal, but correct code if the register allocator
/// cannot coalesce all subreg operations during allocation.
@@ -145,36 +145,36 @@ namespace llvm {
/// IntrinsicLowering Pass - Performs target-independent LLVM IR
/// transformations for highly portable strategies.
FunctionPass *createGCLoweringPass();
-
+
/// MachineCodeAnalysis Pass - Target-independent pass to mark safe points in
/// machine code. Must be added very late during code generation, just prior
/// to output, and importantly after all CFG transformations (such as branch
/// folding).
FunctionPass *createGCMachineCodeAnalysisPass();
-
+
/// Deleter Pass - Releases GC metadata.
- ///
+ ///
FunctionPass *createGCInfoDeleter();
-
+
/// Creates a pass to print GC metadata.
- ///
+ ///
FunctionPass *createGCInfoPrinter(raw_ostream &OS);
-
+
/// createMachineCSEPass - This pass performs global CSE on machine
/// instructions.
FunctionPass *createMachineCSEPass();
/// createMachineLICMPass - This pass performs LICM on machine instructions.
- ///
+ ///
FunctionPass *createMachineLICMPass(bool PreRegAlloc = true);
/// createMachineSinkingPass - This pass performs sinking on machine
/// instructions.
FunctionPass *createMachineSinkingPass();
- /// createOptimizeExtsPass - This pass performs sign / zero extension
- /// optimization by increasing uses of extended values.
- FunctionPass *createOptimizeExtsPass();
+ /// createPeepholeOptimizerPass - This pass performs peephole optimizations -
+ /// like extension and comparison eliminations.
+ FunctionPass *createPeepholeOptimizerPass();
/// createOptimizePHIsPass - This pass optimizes machine instruction PHIs
/// to take advantage of opportunities created during DAG legalization.
@@ -188,19 +188,23 @@ namespace llvm {
/// createMachineVerifierPass - This pass verifies cenerated machine code
/// instructions for correctness.
- ///
- /// @param allowDoubleDefs ignore double definitions of
- /// registers. Useful before LiveVariables has run.
- FunctionPass *createMachineVerifierPass(bool allowDoubleDefs);
+ FunctionPass *createMachineVerifierPass();
/// createDwarfEHPass - This pass mulches exception handling code into a form
/// adapted to code generation. Required if using dwarf exception handling.
- FunctionPass *createDwarfEHPass(const TargetMachine *tm, bool fast);
+ FunctionPass *createDwarfEHPass(const TargetMachine *tm);
/// createSjLjEHPass - This pass adapts exception handling code to use
/// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
FunctionPass *createSjLjEHPass(const TargetLowering *tli);
+ /// createLocalStackSlotAllocationPass - This pass assigns local frame
+ /// indices to stack slots relative to one another and allocates
+ /// base registers to access them when it is estimated by the target to
+ /// be out of range of normal frame pointer or stack pointer index
+ /// addressing.
+ FunctionPass *createLocalStackSlotAllocationPass();
+
} // End llvm namespace
#endif
diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h
index 30477b9b80b8..1d743c1cba24 100644
--- a/include/llvm/CodeGen/ProcessImplicitDefs.h
+++ b/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -31,7 +31,7 @@ namespace llvm {
public:
static char ID;
- ProcessImplicitDefs() : MachineFunctionPass(&ID) {}
+ ProcessImplicitDefs() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index 14c33e218755..96573dd5d8b1 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -78,12 +78,19 @@ ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel);
-/// createHybridListDAGScheduler - This creates a bottom up hybrid register
-/// usage reduction list scheduler that make use of latency information to
-/// avoid stalls for long latency instructions.
+/// createHybridListDAGScheduler - This creates a bottom up register pressure
+/// aware list scheduler that make use of latency information to avoid stalls
+/// for long latency instructions in low register pressure mode. In high
+/// register pressure mode it schedules to reduce register pressure.
ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level);
+/// createILPListDAGScheduler - This creates a bottom up register pressure
+/// aware list scheduler that tries to increase instruction level parallelism
+/// in low register pressure mode. In high register pressure mode it schedules
+/// to reduce register pressure.
+ScheduleDAGSDNodes *createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level);
/// createTDListDAGScheduler - This creates a top-down list scheduler with
/// a hazard recognizer.
ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index de49d184131d..7723fa00e90d 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -977,10 +977,6 @@ public:
/// been verified as a debug information descriptor.
bool isVerifiedDebugInfoDesc(SDValue Op) const;
- /// getShuffleScalarElt - Returns the scalar element that will make up the ith
- /// element of the result of the vector shuffle.
- SDValue getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned Idx);
-
/// UnrollVectorOp - Utility function used by legalize and lowering to
/// "unroll" a vector operation by splitting out the scalars and operating
/// on each element individually. If the ResNE is 0, fully unroll the vector
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index f1f047b44ed2..88044c7242c9 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -128,7 +128,8 @@ namespace llvm {
friend class SlotIndexes;
friend struct DenseMapInfo<SlotIndex>;
- private:
+ enum Slot { LOAD, USE, DEF, STORE, NUM };
+
static const unsigned PHI_BIT = 1 << 2;
PointerIntPair<IndexListEntry*, 3, unsigned> lie;
@@ -146,6 +147,11 @@ namespace llvm {
return entry().getIndex() | getSlot();
}
+ /// Returns the slot for this SlotIndex.
+ Slot getSlot() const {
+ return static_cast<Slot>(lie.getInt() & ~PHI_BIT);
+ }
+
static inline unsigned getHashValue(const SlotIndex &v) {
IndexListEntry *ptrVal = &v.entry();
return (unsigned((intptr_t)ptrVal) >> 4) ^
@@ -153,11 +159,6 @@ namespace llvm {
}
public:
-
- // FIXME: Ugh. This is public because LiveIntervalAnalysis is still using it
- // for some spill weight stuff. Fix that, then make this private.
- enum Slot { LOAD, USE, DEF, STORE, NUM };
-
static inline SlotIndex getEmptyKey() {
return SlotIndex(IndexListEntry::getEmptyKeyEntry(), 0);
}
@@ -235,16 +236,31 @@ namespace llvm {
return other.getIndex() - getIndex();
}
- /// Returns the slot for this SlotIndex.
- Slot getSlot() const {
- return static_cast<Slot>(lie.getInt() & ~PHI_BIT);
- }
-
/// Returns the state of the PHI bit.
bool isPHI() const {
return lie.getInt() & PHI_BIT;
}
+ /// isLoad - Return true if this is a LOAD slot.
+ bool isLoad() const {
+ return getSlot() == LOAD;
+ }
+
+ /// isDef - Return true if this is a DEF slot.
+ bool isDef() const {
+ return getSlot() == DEF;
+ }
+
+ /// isUse - Return true if this is a USE slot.
+ bool isUse() const {
+ return getSlot() == USE;
+ }
+
+ /// isStore - Return true if this is a STORE slot.
+ bool isStore() const {
+ return getSlot() == STORE;
+ }
+
/// Returns the base index for associated with this index. The base index
/// is the one associated with the LOAD slot for the instruction pointed to
/// by this index.
@@ -475,7 +491,7 @@ namespace llvm {
public:
static char ID;
- SlotIndexes() : MachineFunctionPass(&ID), indexListHead(0) {}
+ SlotIndexes() : MachineFunctionPass(ID), indexListHead(0) {}
virtual void getAnalysisUsage(AnalysisUsage &au) const;
virtual void releaseMemory();
@@ -494,6 +510,11 @@ namespace llvm {
return SlotIndex(front(), 0);
}
+ /// Returns the base index of the last slot in this analysis.
+ SlotIndex getLastIndex() {
+ return SlotIndex(back(), 0);
+ }
+
/// Returns the invalid index marker for this analysis.
SlotIndex getInvalidIndex() {
return getZeroIndex();
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 3aaab8861224..d8f037385957 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -105,7 +105,6 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
const MCSection *UStringSection;
const MCSection *TextCoalSection;
const MCSection *ConstTextCoalSection;
- const MCSection *ConstDataCoalSection;
const MCSection *ConstDataSection;
const MCSection *DataCoalSection;
const MCSection *DataCommonSection;
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 6e2a102274a8..51f324c959c0 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -159,14 +159,12 @@ namespace llvm {
/// getPow2VectorType - Widens the length of the given vector EVT up to
/// the nearest power of 2 and returns that type.
MVT getPow2VectorType() const {
- if (!isPow2VectorType()) {
- unsigned NElts = getVectorNumElements();
- unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
- return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
- }
- else {
+ if (isPow2VectorType())
return *this;
- }
+
+ unsigned NElts = getVectorNumElements();
+ unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
+ return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
}
/// getScalarType - If this is a vector type, return the element type,
@@ -350,17 +348,6 @@ namespace llvm {
}
return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
-
- static MVT getIntVectorWithNumElements(unsigned NumElts) {
- switch (NumElts) {
- default: return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
- case 1: return MVT::v1i64;
- case 2: return MVT::v2i32;
- case 4: return MVT::v4i16;
- case 8: return MVT::v8i8;
- case 16: return MVT::v16i8;
- }
- }
};
struct EVT { // EVT = Extended Value Type
@@ -374,22 +361,16 @@ namespace llvm {
EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(0) { }
EVT(MVT S) : V(S), LLVMTy(0) {}
- bool operator==(const EVT VT) const {
- if (V.SimpleTy == VT.V.SimpleTy) {
- if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
- return LLVMTy == VT.LLVMTy;
+ bool operator==(EVT VT) const {
+ return !(*this != VT);
+ }
+ bool operator!=(EVT VT) const {
+ if (V.SimpleTy != VT.V.SimpleTy)
return true;
- }
+ if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+ return LLVMTy != VT.LLVMTy;
return false;
}
- bool operator!=(const EVT VT) const {
- if (V.SimpleTy == VT.V.SimpleTy) {
- if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
- return LLVMTy != VT.LLVMTy;
- return false;
- }
- return true;
- }
/// getFloatingPointVT - Returns the EVT that represents a floating point
/// type with the given number of bits. There are two floating point types
@@ -402,30 +383,32 @@ namespace llvm {
/// number of bits.
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
MVT M = MVT::getIntegerVT(BitWidth);
- if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
- return getExtendedIntegerVT(Context, BitWidth);
- else
+ if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
return M;
+ return getExtendedIntegerVT(Context, BitWidth);
}
/// getVectorVT - Returns the EVT that represents a vector NumElements in
/// length, where each element is of type VT.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
MVT M = MVT::getVectorVT(VT.V, NumElements);
- if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
- return getExtendedVectorVT(Context, VT, NumElements);
- else
+ if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
return M;
+ return getExtendedVectorVT(Context, VT, NumElements);
}
/// getIntVectorWithNumElements - Return any integer vector type that has
/// the specified number of elements.
static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) {
- MVT M = MVT::getIntVectorWithNumElements(NumElts);
- if (M.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
- return getVectorVT(C, MVT::i8, NumElts);
- else
- return M;
+ switch (NumElts) {
+ default: return getVectorVT(C, MVT::i8, NumElts);
+ case 1: return MVT::v1i64;
+ case 2: return MVT::v2i32;
+ case 4: return MVT::v4i16;
+ case 8: return MVT::v8i8;
+ case 16: return MVT::v16i8;
+ }
+ return MVT::INVALID_SIMPLE_VALUE_TYPE;
}
/// isSimple - Test if the given EVT is simple (as opposed to being
@@ -457,26 +440,27 @@ namespace llvm {
/// is64BitVector - Return true if this is a 64-bit vector type.
bool is64BitVector() const {
- return isSimple() ?
- (V==MVT::v8i8 || V==MVT::v4i16 || V==MVT::v2i32 ||
- V==MVT::v1i64 || V==MVT::v2f32) :
- isExtended64BitVector();
+ if (!isSimple())
+ return isExtended64BitVector();
+
+ return (V == MVT::v8i8 || V==MVT::v4i16 || V==MVT::v2i32 ||
+ V == MVT::v1i64 || V==MVT::v2f32);
}
/// is128BitVector - Return true if this is a 128-bit vector type.
bool is128BitVector() const {
- return isSimple() ?
- (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 ||
- V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64) :
- isExtended128BitVector();
+ if (!isSimple())
+ return isExtended128BitVector();
+ return (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 ||
+ V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64);
}
/// is256BitVector - Return true if this is a 256-bit vector type.
inline bool is256BitVector() const {
- return isSimple()
- ? (V==MVT::v8f32 || V==MVT::v4f64 || V==MVT::v32i8 ||
- V==MVT::v16i16 || V==MVT::v8i32 || V==MVT::v4i64)
- : isExtended256BitVector();
+ if (!isSimple())
+ return isExtended256BitVector();
+ return (V == MVT::v8f32 || V == MVT::v4f64 || V == MVT::v32i8 ||
+ V == MVT::v16i16 || V == MVT::v8i32 || V == MVT::v4i64);
}
/// is512BitVector - Return true if this is a 512-bit vector type.
@@ -550,8 +534,7 @@ namespace llvm {
assert(isVector() && "Invalid vector type!");
if (isSimple())
return V.getVectorElementType();
- else
- return getExtendedVectorElementType();
+ return getExtendedVectorElementType();
}
/// getVectorNumElements - Given a vector type, return the number of
@@ -560,16 +543,14 @@ namespace llvm {
assert(isVector() && "Invalid vector type!");
if (isSimple())
return V.getVectorNumElements();
- else
- return getExtendedVectorNumElements();
+ return getExtendedVectorNumElements();
}
/// getSizeInBits - Return the size of the specified value type in bits.
unsigned getSizeInBits() const {
if (isSimple())
return V.getSizeInBits();
- else
- return getExtendedSizeInBits();
+ return getExtendedSizeInBits();
}
/// getStoreSize - Return the number of bytes overwritten by a store
@@ -592,8 +573,7 @@ namespace llvm {
unsigned BitWidth = getSizeInBits();
if (BitWidth <= 8)
return EVT(MVT::i8);
- else
- return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth));
+ return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth));
}
/// getHalfSizedIntegerVT - Finds the smallest simple value type that is
@@ -604,12 +584,10 @@ namespace llvm {
assert(isInteger() && !isVector() && "Invalid integer type!");
unsigned EVTSize = getSizeInBits();
for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
- IntVT <= MVT::LAST_INTEGER_VALUETYPE;
- ++IntVT) {
+ IntVT <= MVT::LAST_INTEGER_VALUETYPE; ++IntVT) {
EVT HalfVT = EVT((MVT::SimpleValueType)IntVT);
- if(HalfVT.getSizeInBits() * 2 >= EVTSize) {
+ if (HalfVT.getSizeInBits() * 2 >= EVTSize)
return HalfVT;
- }
}
return getIntegerVT(Context, (EVTSize + 1) / 2);
}
diff --git a/include/llvm/CompilerDriver/Action.h b/include/llvm/CompilerDriver/Action.h
index 70141393ce1f..f2b79655f60f 100644
--- a/include/llvm/CompilerDriver/Action.h
+++ b/include/llvm/CompilerDriver/Action.h
@@ -34,12 +34,16 @@ namespace llvmc {
std::string OutFile_;
public:
- Action (const std::string& C, const StrVector& A,
- bool S, const std::string& O)
- : Command_(C), Args_(A), StopCompilation_(S), OutFile_(O)
- {}
-
- /// Execute - Executes the represented action.
+ void Construct (const std::string& C, const StrVector& A,
+ bool S, const std::string& O) {
+ Command_ = C;
+ Args_ = A;
+ StopCompilation_ = S;
+ OutFile_ = O;
+ }
+ bool IsConstructed () { return (Command_.size() != 0);}
+
+ /// Execute - Executes the command. Returns -1 on error.
int Execute () const;
bool StopCompilation () const { return StopCompilation_; }
const std::string& OutFile() { return OutFile_; }
diff --git a/include/llvm/CompilerDriver/AutoGenerated.h b/include/llvm/CompilerDriver/AutoGenerated.h
new file mode 100644
index 000000000000..7b926c622c90
--- /dev/null
+++ b/include/llvm/CompilerDriver/AutoGenerated.h
@@ -0,0 +1,40 @@
+//===--- AutoGenerated.h - The LLVM Compiler Driver -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to the autogenerated driver code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
+
+namespace llvmc {
+ class LanguageMap;
+ class CompilationGraph;
+
+ namespace autogenerated {
+
+ int PreprocessOptions();
+ int PopulateLanguageMap(LanguageMap& langMap);
+ int PopulateCompilationGraph(CompilationGraph& graph);
+
+ inline int RunInitialization (LanguageMap& M, CompilationGraph& G) {
+ if (int ret = PreprocessOptions())
+ return ret;
+ if (int ret = PopulateLanguageMap(M))
+ return ret;
+ if (int ret = PopulateCompilationGraph(G))
+ return ret;
+
+ return 0;
+ }
+ }
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
diff --git a/include/llvm/CompilerDriver/BuiltinOptions.h b/include/llvm/CompilerDriver/BuiltinOptions.h
index 0c1bbe2582d0..7b9c15c52f7f 100644
--- a/include/llvm/CompilerDriver/BuiltinOptions.h
+++ b/include/llvm/CompilerDriver/BuiltinOptions.h
@@ -18,6 +18,8 @@
#include <string>
+namespace llvmc {
+
namespace SaveTempsEnum { enum Values { Cwd, Obj, Unset }; }
extern llvm::cl::list<std::string> InputFilenames;
@@ -32,4 +34,6 @@ extern llvm::cl::opt<bool> ViewGraph;
extern llvm::cl::opt<bool> WriteGraph;
extern llvm::cl::opt<SaveTempsEnum::Values> SaveTemps;
+} // End namespace llvmc.
+
#endif // LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
diff --git a/include/llvm/CompilerDriver/Common.td b/include/llvm/CompilerDriver/Common.td
index 31a627d6273a..84e8783d20aa 100644
--- a/include/llvm/CompilerDriver/Common.td
+++ b/include/llvm/CompilerDriver/Common.td
@@ -32,6 +32,7 @@ def actions;
def alias_option;
def switch_option;
+def switch_list_option;
def parameter_option;
def parameter_list_option;
def prefix_option;
@@ -39,7 +40,6 @@ def prefix_list_option;
// Possible option properties.
-def extern;
def help;
def hidden;
def init;
@@ -93,17 +93,8 @@ def error;
def set_option;
def unset_option;
-// Increase/decrease the edge weight.
+// Increase the edge weight.
def inc_weight;
-def dec_weight;
-
-// Empty DAG marker.
-def empty_dag_marker;
-
-// Used to specify plugin priority.
-class PluginPriority<int p> {
- int priority = p;
-}
// Option list - a single place to specify options.
class OptionList<list<dag> l> {
@@ -117,31 +108,17 @@ class OptionPreprocessor<dag d> {
// Map from suffixes to language names
-class LangToSuffixes<string str, list<string> lst> {
- string lang = str;
- list<string> suffixes = lst;
-}
+def lang_to_suffixes;
-class LanguageMap<list<LangToSuffixes> lst> {
- list<LangToSuffixes> map = lst;
+class LanguageMap<list<dag> l> {
+ list<dag> map = l;
}
// Compilation graph
-class EdgeBase<string t1, string t2, dag d> {
- string a = t1;
- string b = t2;
- dag weight = d;
-}
-
-class Edge<string t1, string t2> : EdgeBase<t1, t2, (empty_dag_marker)>;
-
-// Edge and SimpleEdge are synonyms.
-class SimpleEdge<string t1, string t2> : EdgeBase<t1, t2, (empty_dag_marker)>;
-
-// Optionally enabled edge.
-class OptionalEdge<string t1, string t2, dag props> : EdgeBase<t1, t2, props>;
+def edge;
+def optional_edge;
-class CompilationGraph<list<EdgeBase> lst> {
- list<EdgeBase> edges = lst;
+class CompilationGraph<list<dag> l> {
+ list<dag> edges = l;
}
diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h
index ba6ff4714ce4..619c904f15d6 100644
--- a/include/llvm/CompilerDriver/CompilationGraph.h
+++ b/include/llvm/CompilerDriver/CompilationGraph.h
@@ -36,7 +36,7 @@ namespace llvmc {
public:
/// GetLanguage - Find the language name corresponding to a given file.
- const std::string& GetLanguage(const llvm::sys::Path&) const;
+ const std::string* GetLanguage(const llvm::sys::Path&) const;
};
/// Edge - Represents an edge of the compilation graph.
@@ -46,7 +46,7 @@ namespace llvmc {
virtual ~Edge() {}
const std::string& ToolName() const { return ToolName_; }
- virtual unsigned Weight(const InputLanguagesSet& InLangs) const = 0;
+ virtual int Weight(const InputLanguagesSet& InLangs) const = 0;
private:
std::string ToolName_;
};
@@ -55,7 +55,7 @@ namespace llvmc {
class SimpleEdge : public Edge {
public:
SimpleEdge(const std::string& T) : Edge(T) {}
- unsigned Weight(const InputLanguagesSet&) const { return 1; }
+ int Weight(const InputLanguagesSet&) const { return 1; }
};
/// Node - A node (vertex) of the compilation graph.
@@ -132,32 +132,32 @@ namespace llvmc {
void insertNode(Tool* T);
/// insertEdge - Insert a new edge into the graph. Takes ownership
- /// of the Edge object.
- void insertEdge(const std::string& A, Edge* E);
+ /// of the Edge object. Returns non-zero value on error.
+ int insertEdge(const std::string& A, Edge* E);
- /// Build - Build target(s) from the input file set. Command-line
- /// options are passed implicitly as global variables.
+ /// Build - Build target(s) from the input file set. Command-line options
+ /// are passed implicitly as global variables. Returns non-zero value on
+ /// error (usually the failed program's exit code).
int Build(llvm::sys::Path const& TempDir, const LanguageMap& LangMap);
- /// Check - Check the compilation graph for common errors like
- /// cycles, input/output language mismatch and multiple default
- /// edges. Prints error messages and in case it finds any errors.
+ /// Check - Check the compilation graph for common errors like cycles,
+ /// input/output language mismatch and multiple default edges. Prints error
+ /// messages and in case it finds any errors.
int Check();
- /// getNode - Return a reference to the node correponding to the
- /// given tool name. Throws std::runtime_error.
- Node& getNode(const std::string& ToolName);
- const Node& getNode(const std::string& ToolName) const;
+ /// getNode - Return a reference to the node corresponding to the given tool
+ /// name. Returns 0 on error.
+ Node* getNode(const std::string& ToolName);
+ const Node* getNode(const std::string& ToolName) const;
- /// viewGraph - This function is meant for use from the debugger.
- /// You can just say 'call G->viewGraph()' and a ghostview window
- /// should pop up from the program, displaying the compilation
- /// graph. This depends on there being a 'dot' and 'gv' program
- /// in your path.
+ /// viewGraph - This function is meant for use from the debugger. You can
+ /// just say 'call G->viewGraph()' and a ghostview window should pop up from
+ /// the program, displaying the compilation graph. This depends on there
+ /// being a 'dot' and 'gv' program in your path.
void viewGraph();
/// writeGraph - Write Graphviz .dot source file to the current direcotry.
- void writeGraph(const std::string& OutputFilename);
+ int writeGraph(const std::string& OutputFilename);
// GraphTraits support.
friend NodesIterator GraphBegin(CompilationGraph*);
@@ -167,16 +167,15 @@ namespace llvmc {
// Helper functions.
/// getToolsVector - Return a reference to the list of tool names
- /// corresponding to the given language name. Throws
- /// std::runtime_error.
- const tools_vector_type& getToolsVector(const std::string& LangName) const;
+ /// corresponding to the given language name. Returns 0 on error.
+ const tools_vector_type* getToolsVector(const std::string& LangName) const;
- /// PassThroughGraph - Pass the input file through the toolchain
- /// starting at StartNode.
- void PassThroughGraph (const llvm::sys::Path& In, const Node* StartNode,
- const InputLanguagesSet& InLangs,
- const llvm::sys::Path& TempDir,
- const LanguageMap& LangMap) const;
+ /// PassThroughGraph - Pass the input file through the toolchain starting at
+ /// StartNode.
+ int PassThroughGraph (const llvm::sys::Path& In, const Node* StartNode,
+ const InputLanguagesSet& InLangs,
+ const llvm::sys::Path& TempDir,
+ const LanguageMap& LangMap) const;
/// FindToolChain - Find head of the toolchain corresponding to
/// the given file.
@@ -185,26 +184,32 @@ namespace llvmc {
InputLanguagesSet& InLangs,
const LanguageMap& LangMap) const;
- /// BuildInitial - Traverse the initial parts of the toolchains.
- void BuildInitial(InputLanguagesSet& InLangs,
- const llvm::sys::Path& TempDir,
- const LanguageMap& LangMap);
+ /// BuildInitial - Traverse the initial parts of the toolchains. Returns
+ /// non-zero value on error.
+ int BuildInitial(InputLanguagesSet& InLangs,
+ const llvm::sys::Path& TempDir,
+ const LanguageMap& LangMap);
- /// TopologicalSort - Sort the nodes in topological order.
- void TopologicalSort(std::vector<const Node*>& Out);
- /// TopologicalSortFilterJoinNodes - Call TopologicalSort and
- /// filter the resulting list to include only Join nodes.
- void TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out);
+ /// TopologicalSort - Sort the nodes in topological order. Returns non-zero
+ /// value on error.
+ int TopologicalSort(std::vector<const Node*>& Out);
+ /// TopologicalSortFilterJoinNodes - Call TopologicalSort and filter the
+ /// resulting list to include only Join nodes. Returns non-zero value on
+ /// error.
+ int TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out);
// Functions used to implement Check().
- /// CheckLanguageNames - Check that output/input language names
- /// match for all nodes.
+ /// CheckLanguageNames - Check that output/input language names match for
+ /// all nodes. Returns non-zero value on error (number of errors
+ /// encountered).
int CheckLanguageNames() const;
- /// CheckMultipleDefaultEdges - check that there are no multiple
- /// default default edges.
+ /// CheckMultipleDefaultEdges - check that there are no multiple default
+ /// default edges. Returns non-zero value on error (number of errors
+ /// encountered).
int CheckMultipleDefaultEdges() const;
- /// CheckCycles - Check that there are no cycles in the graph.
+ /// CheckCycles - Check that there are no cycles in the graph. Returns
+ /// non-zero value on error (number of errors encountered).
int CheckCycles();
};
@@ -270,7 +275,7 @@ namespace llvmc {
}
inline pointer operator*() const {
- return &OwningGraph->getNode((*EdgeIter)->ToolName());
+ return OwningGraph->getNode((*EdgeIter)->ToolName());
}
inline pointer operator->() const {
return this->operator*();
@@ -301,7 +306,7 @@ namespace llvm {
typedef llvmc::NodeChildIterator ChildIteratorType;
static NodeType* getEntryNode(GraphType* G) {
- return &G->getNode("root");
+ return G->getNode("root");
}
static ChildIteratorType child_begin(NodeType* N) {
diff --git a/include/llvm/CompilerDriver/Error.h b/include/llvm/CompilerDriver/Error.h
index fa678cfbfb92..013094e5dd79 100644
--- a/include/llvm/CompilerDriver/Error.h
+++ b/include/llvm/CompilerDriver/Error.h
@@ -7,28 +7,22 @@
//
//===----------------------------------------------------------------------===//
//
-// Exception classes for llvmc.
+// Error handling.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INCLUDE_COMPILER_DRIVER_ERROR_H
#define LLVM_INCLUDE_COMPILER_DRIVER_ERROR_H
-#include <stdexcept>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvmc {
- /// error_code - This gets thrown during the compilation process if a tool
- /// invocation returns a non-zero exit code.
- class error_code: public std::runtime_error {
- int Code_;
- public:
- error_code (int c)
- : std::runtime_error("Tool returned error code"), Code_(c)
- {}
-
- int code() const { return Code_; }
- };
+ inline void PrintError(llvm::StringRef Err) {
+ extern const char* ProgramName;
+ llvm::errs() << ProgramName << ": " << Err << '\n';
+ }
}
diff --git a/include/llvm/CompilerDriver/ForceLinkage.h b/include/llvm/CompilerDriver/ForceLinkage.h
deleted file mode 100644
index 830c04e2d307..000000000000
--- a/include/llvm/CompilerDriver/ForceLinkage.h
+++ /dev/null
@@ -1,122 +0,0 @@
-//===--- ForceLinkage.h - The LLVM Compiler Driver --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// A bit of preprocessor magic to force references to static libraries. Needed
-// because plugin initialization is done via static variables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
-#define LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
-
-#include "llvm/CompilerDriver/ForceLinkageMacros.h"
-
-namespace llvmc {
-
-// Declare all ForceLinkage$(PluginName) functions.
-
-#ifdef LLVMC_BUILTIN_PLUGIN_1
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_1);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_2
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_2);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_3
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_3);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_4
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_4);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_5
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_5);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_6
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_6);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_7
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_7);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_8
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_8);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_9
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_9);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_10
- LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_10);
-#endif
-
-namespace force_linkage {
-
- struct LinkageForcer {
-
- LinkageForcer() {
-
-// Call all ForceLinkage$(PluginName) functions.
-#ifdef LLVMC_BUILTIN_PLUGIN_1
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_1);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_2
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_2);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_3
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_3);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_4
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_4);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_5
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_5);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_6
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_6);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_7
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_7);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_8
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_8);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_9
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_9);
-#endif
-
-#ifdef LLVMC_BUILTIN_PLUGIN_10
- LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_10);
-#endif
-
- }
- };
-} // End namespace force_linkage.
-
-// The only externally used bit.
-void ForceLinkage() {
- force_linkage::LinkageForcer dummy;
-}
-
-} // End namespace llvmc.
-
-#endif // LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
diff --git a/include/llvm/CompilerDriver/ForceLinkageMacros.h b/include/llvm/CompilerDriver/ForceLinkageMacros.h
deleted file mode 100644
index 8862b0082344..000000000000
--- a/include/llvm/CompilerDriver/ForceLinkageMacros.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===--- ForceLinkageMacros.h - The LLVM Compiler Driver --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Preprocessor magic that forces references to static libraries - common
-// macros used by both driver and plugins.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
-#define LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
-
-#define LLVMC_FORCE_LINKAGE_PREFIX(PluginName) ForceLinkage ## PluginName
-
-#define LLVMC_FORCE_LINKAGE_FUN(PluginName) \
- LLVMC_FORCE_LINKAGE_PREFIX(PluginName)
-
-#define LLVMC_FORCE_LINKAGE_DECL(PluginName) \
- void LLVMC_FORCE_LINKAGE_FUN(PluginName) ()
-
-#define LLVMC_FORCE_LINKAGE_CALL(PluginName) \
- LLVMC_FORCE_LINKAGE_FUN(PluginName) ()
-
-#endif // LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
diff --git a/include/llvm/CompilerDriver/Main.h b/include/llvm/CompilerDriver/Main.h
new file mode 100644
index 000000000000..d136a5d2fa19
--- /dev/null
+++ b/include/llvm/CompilerDriver/Main.h
@@ -0,0 +1,21 @@
+//===--- Main.h - The LLVM Compiler Driver ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Entry point for the driver executable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
+
+namespace llvmc {
+ int Main(int argc, char** argv);
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
diff --git a/include/llvm/CompilerDriver/Main.inc b/include/llvm/CompilerDriver/Main.inc
index 71bb8cb3bf73..41640437de89 100644
--- a/include/llvm/CompilerDriver/Main.inc
+++ b/include/llvm/CompilerDriver/Main.inc
@@ -7,26 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This tool provides a single point of access to the LLVM
-// compilation tools. It has many options. To discover the options
-// supported please refer to the tools' manual page or run the tool
-// with the -help option.
-//
-// This file provides the default entry point for the driver executable.
+// Default main() for the driver executable.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
#define LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
-#include "llvm/CompilerDriver/ForceLinkage.h"
-
-namespace llvmc {
- int Main(int argc, char** argv);
-}
+#include "llvm/CompilerDriver/Main.h"
int main(int argc, char** argv) {
- llvmc::ForceLinkage();
return llvmc::Main(argc, argv);
}
diff --git a/include/llvm/CompilerDriver/Plugin.h b/include/llvm/CompilerDriver/Plugin.h
deleted file mode 100644
index e9a20488a081..000000000000
--- a/include/llvm/CompilerDriver/Plugin.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===--- Plugin.h - The LLVM Compiler Driver --------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Plugin support for llvmc.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_INCLUDE_COMPILER_DRIVER_PLUGIN_H
-#define LLVM_INCLUDE_COMPILER_DRIVER_PLUGIN_H
-
-#include "llvm/Support/Registry.h"
-
-namespace llvmc {
-
- class LanguageMap;
- class CompilationGraph;
-
- /// BasePlugin - An abstract base class for all LLVMC plugins.
- struct BasePlugin {
-
- /// Priority - Plugin priority, useful for handling dependencies
- /// between plugins. Plugins with lower priorities are loaded
- /// first.
- virtual int Priority() const { return 0; }
-
- /// PreprocessOptions - The auto-generated function that performs various
- /// consistency checks on options (like ensuring that -O2 and -O3 are not
- /// used together).
- virtual void PreprocessOptions() const = 0;
-
- /// PopulateLanguageMap - The auto-generated function that fills in
- /// the language map (map from file extensions to language names).
- virtual void PopulateLanguageMap(LanguageMap&) const = 0;
-
- /// PopulateCompilationGraph - The auto-generated function that
- /// populates the compilation graph with nodes and edges.
- virtual void PopulateCompilationGraph(CompilationGraph&) const = 0;
-
- /// Needed to avoid a compiler warning.
- virtual ~BasePlugin() {}
- };
-
- typedef llvm::Registry<BasePlugin> PluginRegistry;
-
- template <class P>
- struct RegisterPlugin
- : public PluginRegistry::Add<P> {
- typedef PluginRegistry::Add<P> Base;
-
- RegisterPlugin(const char* Name = "Nameless",
- const char* Desc = "Auto-generated plugin")
- : Base(Name, Desc) {}
- };
-
-
- /// PluginLoader - Helper class used by the main program for
- /// lifetime management.
- struct PluginLoader {
- PluginLoader();
- ~PluginLoader();
-
- /// RunInitialization - Calls PreprocessOptions, PopulateLanguageMap and
- /// PopulateCompilationGraph methods of all plugins. This populates the
- /// global language map and the compilation graph.
- void RunInitialization(LanguageMap& langMap, CompilationGraph& graph) const;
-
- private:
- // noncopyable
- PluginLoader(const PluginLoader& other);
- const PluginLoader& operator=(const PluginLoader& other);
- };
-
-}
-
-#endif // LLVM_INCLUDE_COMPILER_DRIVER_PLUGIN_H
diff --git a/include/llvm/CompilerDriver/Tool.h b/include/llvm/CompilerDriver/Tool.h
index 85d1690bcfed..45ef50d0b5af 100644
--- a/include/llvm/CompilerDriver/Tool.h
+++ b/include/llvm/CompilerDriver/Tool.h
@@ -38,17 +38,23 @@ namespace llvmc {
virtual ~Tool() {}
- virtual Action GenerateAction (const PathVector& inFiles,
- bool HasChildren,
- const llvm::sys::Path& TempDir,
- const InputLanguagesSet& InLangs,
- const LanguageMap& LangMap) const = 0;
-
- virtual Action GenerateAction (const llvm::sys::Path& inFile,
- bool HasChildren,
- const llvm::sys::Path& TempDir,
- const InputLanguagesSet& InLangs,
- const LanguageMap& LangMap) const = 0;
+ /// GenerateAction - Generate an Action given particular command-line
+ /// options. Returns non-zero value on error.
+ virtual int GenerateAction (Action& Out,
+ const PathVector& inFiles,
+ const bool HasChildren,
+ const llvm::sys::Path& TempDir,
+ const InputLanguagesSet& InLangs,
+ const LanguageMap& LangMap) const = 0;
+
+ /// GenerateAction - Generate an Action given particular command-line
+ /// options. Returns non-zero value on error.
+ virtual int GenerateAction (Action& Out,
+ const llvm::sys::Path& inFile,
+ const bool HasChildren,
+ const llvm::sys::Path& TempDir,
+ const InputLanguagesSet& InLangs,
+ const LanguageMap& LangMap) const = 0;
virtual const char* Name() const = 0;
virtual const char** InputLanguages() const = 0;
@@ -74,11 +80,13 @@ namespace llvmc {
void ClearJoinList() { JoinList_.clear(); }
bool JoinListEmpty() const { return JoinList_.empty(); }
- Action GenerateAction(bool HasChildren,
- const llvm::sys::Path& TempDir,
- const InputLanguagesSet& InLangs,
- const LanguageMap& LangMap) const {
- return GenerateAction(JoinList_, HasChildren, TempDir, InLangs, LangMap);
+ int GenerateAction(Action& Out,
+ const bool HasChildren,
+ const llvm::sys::Path& TempDir,
+ const InputLanguagesSet& InLangs,
+ const LanguageMap& LangMap) const {
+ return GenerateAction(Out, JoinList_, HasChildren, TempDir, InLangs,
+ LangMap);
}
// We shouldn't shadow base class's version of GenerateAction.
using Tool::GenerateAction;
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index e7594babaa6b..e8feabffdaf7 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -3,6 +3,9 @@
** Created by Kevin from config.h.in **
***************************************/
+#ifndef CONFIG_H
+#define CONFIG_H
+
/* Define if dlopen(0) will open the symbols of the program */
#undef CAN_DLOPEN_SELF
@@ -525,7 +528,7 @@
#cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
/* Installation prefix directory */
-#undef LLVM_PREFIX
+#cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
/* Define if the OS needs help to load dependent libraries for dlopen(). */
#cmakedefine LTDL_DLOPEN_DEPLIBS ${LTDL_DLOPEN_DEPLIBS}
@@ -623,5 +626,16 @@
/* Define to a function implementing strdup */
#cmakedefine strdup ${strdup}
-/* Native LLVM architecture */
-#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}Target
+/* LLVM architecture name for the native architecture, if available */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
+
+/* LLVM name for the native Target init function, if available */
+#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
+
+/* LLVM name for the native TargetInfo init function, if available */
+#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+
+#endif
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index d12f82a9012c..d62da1ab0377 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -1,5 +1,8 @@
/* include/llvm/Config/config.h.in. Generated from autoconf/configure.ac by autoheader. */
+#ifndef CONFIG_H
+#define CONFIG_H
+
/* 32 bit multilib directory. */
#undef CXX_INCLUDE_32BIT_DIR
@@ -458,6 +461,9 @@
/* Define to 1 if you have the `__dso_handle' function. */
#undef HAVE___DSO_HANDLE
+/* Linker version detected at compile time. */
+#undef HOST_LINK_VERSION
+
/* Installation directory for binary executables */
#undef LLVM_BINDIR
@@ -494,6 +500,15 @@
/* LLVM architecture name for the native architecture, if available */
#undef LLVM_NATIVE_ARCH
+/* LLVM name for the native AsmPrinter init function, if available */
+#undef LLVM_NATIVE_ASMPRINTER
+
+/* LLVM name for the native Target init function, if available */
+#undef LLVM_NATIVE_TARGET
+
+/* LLVM name for the native TargetInfo init function, if available */
+#undef LLVM_NATIVE_TARGETINFO
+
/* Define if this is Unixish platform */
#undef LLVM_ON_UNIX
@@ -598,3 +613,5 @@
/* Define to `unsigned int' if <sys/types.h> does not define. */
#undef size_t
+
+#endif
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
new file mode 100644
index 000000000000..8469bcc60674
--- /dev/null
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -0,0 +1,97 @@
+/*===-- llvm/config/llvm-config.h - llvm configure variable -------*- C -*-===*/
+/* */
+/* The LLVM Compiler Infrastructure */
+/* */
+/* This file is distributed under the University of Illinois Open Source */
+/* License. See LICENSE.TXT for details. */
+/* */
+/*===----------------------------------------------------------------------===*/
+
+/* This file enumerates all of the llvm variables from configure so that
+ they can be in exported headers and won't override package specific
+ directives. This is a C file so we can include it in the llvm-c headers. */
+
+/* To avoid multiple inclusions of these variables when we include the exported
+ headers and config.h, conditionally include these. */
+/* TODO: This is a bit of a hack. */
+#ifndef CONFIG_H
+
+/* Installation directory for binary executables */
+#cmakedefine LLVM_BINDIR "${LLVM_BINDIR}"
+
+/* Time at which LLVM was configured */
+#cmakedefine LLVM_CONFIGTIME "${LLVM_CONFIGTIME}"
+
+/* Installation directory for data files */
+#cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
+
+/* Installation directory for documentation */
+#cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
+
+/* Installation directory for config files */
+#cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
+
+/* Host triple we were built on */
+#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
+
+/* Installation directory for include files */
+#cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
+
+/* Installation directory for .info files */
+#cmakedefine LLVM_INFODIR "${LLVM_INFODIR}"
+
+/* Installation directory for libraries */
+#cmakedefine LLVM_LIBDIR "${LLVM_LIBDIR}"
+
+/* Installation directory for man pages */
+#cmakedefine LLVM_MANDIR "${LLVM_MANDIR}"
+
+/* Build multithreading support into LLVM */
+#cmakedefine LLVM_MULTITHREADED
+
+/* LLVM architecture name for the native architecture, if available */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
+
+/* LLVM name for the native Target init function, if available */
+#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
+
+/* LLVM name for the native TargetInfo init function, if available */
+#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+
+/* Define if this is Unixish platform */
+#cmakedefine LLVM_ON_UNIX
+
+/* Define if this is Win32ish platform */
+#cmakedefine LLVM_ON_WIN32
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#cmakedefine LLVM_PATH_DOT "${LLVM_PATH_DOT}"
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#cmakedefine LLVM_PATH_DOTTY "${LLVM_PATH_DOTTY}"
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#cmakedefine LLVM_PATH_FDP "${LLVM_PATH_FDP}"
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#cmakedefine LLVM_PATH_GRAPHVIZ "${LLVM_PATH_GRAPHVIZ}"
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#cmakedefine LLVM_PATH_GV "${LLVM_PATH_GV}"
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#cmakedefine LLVM_PATH_NEATO "${LLVM_PATH_NEATO}"
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
+
+/* Installation prefix directory */
+#cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
+
+#endif
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
new file mode 100644
index 000000000000..e7a04ee91bb9
--- /dev/null
+++ b/include/llvm/Config/llvm-config.h.in
@@ -0,0 +1,97 @@
+/*===-- llvm/config/llvm-config.h - llvm configure variable -------*- C -*-===*/
+/* */
+/* The LLVM Compiler Infrastructure */
+/* */
+/* This file is distributed under the University of Illinois Open Source */
+/* License. See LICENSE.TXT for details. */
+/* */
+/*===----------------------------------------------------------------------===*/
+
+/* This file enumerates all of the llvm variables from configure so that
+ they can be in exported headers and won't override package specific
+ directives. This is a C file so we can include it in the llvm-c headers. */
+
+/* To avoid multiple inclusions of these variables when we include the exported
+ headers and config.h, conditionally include these. */
+/* TODO: This is a bit of a hack. */
+#ifndef CONFIG_H
+
+/* Installation directory for binary executables */
+#undef LLVM_BINDIR
+
+/* Time at which LLVM was configured */
+#undef LLVM_CONFIGTIME
+
+/* Installation directory for data files */
+#undef LLVM_DATADIR
+
+/* Installation directory for documentation */
+#undef LLVM_DOCSDIR
+
+/* Installation directory for config files */
+#undef LLVM_ETCDIR
+
+/* Host triple we were built on */
+#undef LLVM_HOSTTRIPLE
+
+/* Installation directory for include files */
+#undef LLVM_INCLUDEDIR
+
+/* Installation directory for .info files */
+#undef LLVM_INFODIR
+
+/* Installation directory for libraries */
+#undef LLVM_LIBDIR
+
+/* Installation directory for man pages */
+#undef LLVM_MANDIR
+
+/* Build multithreading support into LLVM */
+#undef LLVM_MULTITHREADED
+
+/* LLVM architecture name for the native architecture, if available */
+#undef LLVM_NATIVE_ARCH
+
+/* LLVM name for the native Target init function, if available */
+#undef LLVM_NATIVE_TARGET
+
+/* LLVM name for the native TargetInfo init function, if available */
+#undef LLVM_NATIVE_TARGETINFO
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#undef LLVM_NATIVE_ASMPRINTER
+
+/* Define if this is Unixish platform */
+#undef LLVM_ON_UNIX
+
+/* Define if this is Win32ish platform */
+#undef LLVM_ON_WIN32
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#undef LLVM_PATH_CIRCO
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#undef LLVM_PATH_DOT
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#undef LLVM_PATH_DOTTY
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#undef LLVM_PATH_FDP
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#undef LLVM_PATH_GRAPHVIZ
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#undef LLVM_PATH_GV
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#undef LLVM_PATH_NEATO
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#undef LLVM_PATH_TWOPI
+
+/* Installation prefix directory */
+#undef LLVM_PREFIX
+
+#endif
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 9ca845ec9d72..a7deae0451bc 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -33,7 +33,6 @@ namespace llvm {
class ArrayType;
class IntegerType;
class StructType;
-class UnionType;
class PointerType;
class VectorType;
@@ -459,49 +458,6 @@ struct OperandTraits<ConstantStruct> : public VariadicOperandTraits<> {
DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantStruct, Constant)
-//===----------------------------------------------------------------------===//
-// ConstantUnion - Constant Union Declarations
-//
-class ConstantUnion : public Constant {
- friend struct ConstantCreator<ConstantUnion, UnionType, Constant*>;
- ConstantUnion(const ConstantUnion &); // DO NOT IMPLEMENT
-protected:
- ConstantUnion(const UnionType *T, Constant* Val);
-public:
- // ConstantUnion accessors
- static Constant *get(const UnionType *T, Constant* V);
-
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
- /// getType() specialization - Reduce amount of casting...
- ///
- inline const UnionType *getType() const {
- return reinterpret_cast<const UnionType*>(Value::getType());
- }
-
- /// isNullValue - Return true if this is the value that would be returned by
- /// getNullValue. This always returns false because zero structs are always
- /// created as ConstantAggregateZero objects.
- virtual bool isNullValue() const {
- return false;
- }
-
- virtual void destroyConstant();
- virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
-
- /// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const ConstantUnion *) { return true; }
- static bool classof(const Value *V) {
- return V->getValueID() == ConstantUnionVal;
- }
-};
-
-template <>
-struct OperandTraits<ConstantUnion> : public FixedNumOperandTraits<1> {
-};
-
-DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantUnion, Constant)
//===----------------------------------------------------------------------===//
/// ConstantVector - Constant Vector Declarations
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 912bb6d882b0..9b6b19f15466 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -27,7 +27,6 @@ template<class ValType, class TypeClass> class TypeMap;
class FunctionValType;
class ArrayValType;
class StructValType;
-class UnionValType;
class PointerValType;
class VectorValType;
class IntegerValType;
@@ -52,10 +51,6 @@ protected:
///
void dropAllTypeUses();
- /// unlockedRefineAbstractTypeTo - Internal version of refineAbstractTypeTo
- /// that performs no locking. Only used for internal recursion.
- void unlockedRefineAbstractTypeTo(const Type *NewType);
-
public:
//===--------------------------------------------------------------------===//
@@ -230,8 +225,7 @@ public:
return T->getTypeID() == ArrayTyID ||
T->getTypeID() == StructTyID ||
T->getTypeID() == PointerTyID ||
- T->getTypeID() == VectorTyID ||
- T->getTypeID() == UnionTyID;
+ T->getTypeID() == VectorTyID;
}
};
@@ -302,64 +296,6 @@ public:
bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
};
-
-/// UnionType - Class to represent union types. A union type is similar to
-/// a structure, except that all member fields begin at offset 0.
-///
-class UnionType : public CompositeType {
- friend class TypeMap<UnionValType, UnionType>;
- UnionType(const UnionType &); // Do not implement
- const UnionType &operator=(const UnionType &); // Do not implement
- UnionType(LLVMContext &C, const Type* const* Types, unsigned NumTypes);
-public:
- /// UnionType::get - This static method is the primary way to create a
- /// UnionType.
- static UnionType *get(const Type* const* Types, unsigned NumTypes);
-
- /// UnionType::get - This static method is a convenience method for
- /// creating union types by specifying the elements as arguments.
- static UnionType *get(const Type *type, ...) END_WITH_NULL;
-
- /// isValidElementType - Return true if the specified type is valid as a
- /// element type.
- static bool isValidElementType(const Type *ElemTy);
-
- /// Given an element type, return the member index of that type, or -1
- /// if there is no such member type.
- int getElementTypeIndex(const Type *ElemTy) const;
-
- // Iterator access to the elements
- typedef Type::subtype_iterator element_iterator;
- element_iterator element_begin() const { return ContainedTys; }
- element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
-
- // Random access to the elements
- unsigned getNumElements() const { return NumContainedTys; }
- const Type *getElementType(unsigned N) const {
- assert(N < NumContainedTys && "Element number out of range!");
- return ContainedTys[N];
- }
-
- /// getTypeAtIndex - Given an index value into the type, return the type of
- /// the element. For a union type, this must be a constant value...
- ///
- virtual const Type *getTypeAtIndex(const Value *V) const;
- virtual const Type *getTypeAtIndex(unsigned Idx) const;
- virtual bool indexValid(const Value *V) const;
- virtual bool indexValid(unsigned Idx) const;
-
- // Implement the AbstractTypeUser interface.
- virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
- virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const UnionType *) { return true; }
- static inline bool classof(const Type *T) {
- return T->getTypeID() == UnionTyID;
- }
-};
-
-
/// SequentialType - This is the superclass of the array, pointer and vector
/// type classes. All of these represent "arrays" in memory. The array type
/// represents a specifically sized array, pointer types are unsized/unknown
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index fd519203e3fe..e0159309ce5c 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -29,10 +29,9 @@ namespace llvm {
class JITMemoryManager {
protected:
bool HasGOT;
- bool SizeRequired;
public:
- JITMemoryManager() : HasGOT(false), SizeRequired(false) {}
+ JITMemoryManager() : HasGOT(false) {}
virtual ~JITMemoryManager();
/// CreateDefaultMemManager - This is used to create the default
@@ -71,12 +70,6 @@ public:
/// return a pointer to its base.
virtual uint8_t *getGOTBase() const = 0;
- /// NeedsExactSize - If the memory manager requires to know the size of the
- /// objects to be emitted
- bool NeedsExactSize() const {
- return SizeRequired;
- }
-
//===--------------------------------------------------------------------===//
// Main Allocation Functions
//===--------------------------------------------------------------------===//
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index d175080a6674..62e84f833510 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -41,6 +41,8 @@ public:
PrivateLinkage, ///< Like Internal, but omit from symbol table.
LinkerPrivateLinkage, ///< Like Private, but linker removes.
LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak.
+ LinkerPrivateWeakDefAutoLinkage, ///< Like LinkerPrivateWeak, but possibly
+ /// hidden.
DLLImportLinkage, ///< Function to be imported from DLL
DLLExportLinkage, ///< Function to be accessible from DLL.
ExternalWeakLinkage,///< ExternalWeak linkage description.
@@ -74,11 +76,10 @@ public:
removeDeadConstantUsers(); // remove any dead constants using this.
}
- unsigned getAlignment() const { return Alignment; }
- void setAlignment(unsigned Align) {
- assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
- Alignment = Align;
+ unsigned getAlignment() const {
+ return (1u << Alignment) >> 1;
}
+ void setAlignment(unsigned Align);
VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
@@ -138,9 +139,13 @@ public:
static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) {
return Linkage == LinkerPrivateWeakLinkage;
}
+ static bool isLinkerPrivateWeakDefAutoLinkage(LinkageTypes Linkage) {
+ return Linkage == LinkerPrivateWeakDefAutoLinkage;
+ }
static bool isLocalLinkage(LinkageTypes Linkage) {
return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
- isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage);
+ isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage) ||
+ isLinkerPrivateWeakDefAutoLinkage(Linkage);
}
static bool isDLLImportLinkage(LinkageTypes Linkage) {
return Linkage == DLLImportLinkage;
@@ -159,24 +164,26 @@ public:
/// by something non-equivalent at link time. For example, if a function has
/// weak linkage then the code defining it may be replaced by different code.
static bool mayBeOverridden(LinkageTypes Linkage) {
- return (Linkage == WeakAnyLinkage ||
- Linkage == LinkOnceAnyLinkage ||
- Linkage == CommonLinkage ||
- Linkage == ExternalWeakLinkage ||
- Linkage == LinkerPrivateWeakLinkage);
+ return Linkage == WeakAnyLinkage ||
+ Linkage == LinkOnceAnyLinkage ||
+ Linkage == CommonLinkage ||
+ Linkage == ExternalWeakLinkage ||
+ Linkage == LinkerPrivateWeakLinkage ||
+ Linkage == LinkerPrivateWeakDefAutoLinkage;
}
/// isWeakForLinker - Whether the definition of this global may be replaced at
/// link time.
static bool isWeakForLinker(LinkageTypes Linkage) {
- return (Linkage == AvailableExternallyLinkage ||
- Linkage == WeakAnyLinkage ||
- Linkage == WeakODRLinkage ||
- Linkage == LinkOnceAnyLinkage ||
- Linkage == LinkOnceODRLinkage ||
- Linkage == CommonLinkage ||
- Linkage == ExternalWeakLinkage ||
- Linkage == LinkerPrivateWeakLinkage);
+ return Linkage == AvailableExternallyLinkage ||
+ Linkage == WeakAnyLinkage ||
+ Linkage == WeakODRLinkage ||
+ Linkage == LinkOnceAnyLinkage ||
+ Linkage == LinkOnceODRLinkage ||
+ Linkage == CommonLinkage ||
+ Linkage == ExternalWeakLinkage ||
+ Linkage == LinkerPrivateWeakLinkage ||
+ Linkage == LinkerPrivateWeakDefAutoLinkage;
}
bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
@@ -196,6 +203,9 @@ public:
bool hasLinkerPrivateWeakLinkage() const {
return isLinkerPrivateWeakLinkage(Linkage);
}
+ bool hasLinkerPrivateWeakDefAutoLinkage() const {
+ return isLinkerPrivateWeakDefAutoLinkage(Linkage);
+ }
bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 0b772b0aae44..88f5ce1b2622 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -170,16 +170,6 @@ public:
void setMetadata(unsigned KindID, MDNode *Node);
void setMetadata(const char *Kind, MDNode *Node);
- /// setDbgMetadata - This is just an optimized helper function that is
- /// equivalent to setMetadata("dbg", Node);
- void setDbgMetadata(MDNode *Node);
-
- /// getDbgMetadata - This is just an optimized helper function that is
- /// equivalent to calling getMetadata("dbg").
- MDNode *getDbgMetadata() const {
- return DbgLoc.getAsMDNode(getContext());
- }
-
/// setDebugLoc - Set the debug location information for this instruction.
void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
@@ -199,7 +189,7 @@ private:
void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
MDNode*> > &) const;
- void removeAllMetadata();
+ void clearMetadataHashEntries();
public:
//===--------------------------------------------------------------------===//
// Predicates and helper methods.
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index af93a294cc88..bd1e889de076 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -941,33 +941,17 @@ public:
unsigned(isTC));
}
- /// @deprecated these "define hacks" will go away soon
- /// @brief coerce out-of-tree code to abandon the low-level interfaces
- /// @detail see below comments and update your code to high-level interfaces
- /// - getOperand(0) ---> getCalledValue(), or possibly getCalledFunction
- /// - setOperand(0, V) ---> setCalledFunction(V)
- ///
- /// in LLVM v2.8-only code
- /// - getOperand(N+1) ---> getArgOperand(N)
- /// - setOperand(N+1, V) ---> setArgOperand(N, V)
- /// - getNumOperands() ---> getNumArgOperands()+1 // note the "+1"!
- ///
- /// in backward compatible code please consult llvm/Support/CallSite.h,
- /// you should create a callsite using the CallInst pointer and call its
- /// methods
- ///
-# define public private
-# define protected private
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-# undef public
-# undef protected
-public:
- enum { ArgOffset = 0 }; ///< temporary, do not use for new code!
+ /// getNumArgOperands - Return the number of call arguments.
+ ///
unsigned getNumArgOperands() const { return getNumOperands() - 1; }
- Value *getArgOperand(unsigned i) const { return getOperand(i + ArgOffset); }
- void setArgOperand(unsigned i, Value *v) { setOperand(i + ArgOffset, v); }
+
+ /// getArgOperand/setArgOperand - Return/set the i-th call argument.
+ ///
+ Value *getArgOperand(unsigned i) const { return getOperand(i); }
+ void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
/// getCallingConv/setCallingConv - Get or set the calling convention of this
/// function call.
@@ -1056,17 +1040,22 @@ public:
/// indirect function invocation.
///
Function *getCalledFunction() const {
- return dyn_cast<Function>(Op<ArgOffset -1>());
+ return dyn_cast<Function>(Op<-1>());
}
/// getCalledValue - Get a pointer to the function that is invoked by this
/// instruction.
- const Value *getCalledValue() const { return Op<ArgOffset -1>(); }
- Value *getCalledValue() { return Op<ArgOffset -1>(); }
+ const Value *getCalledValue() const { return Op<-1>(); }
+ Value *getCalledValue() { return Op<-1>(); }
/// setCalledFunction - Set the function called.
void setCalledFunction(Value* Fn) {
- Op<ArgOffset -1>() = Fn;
+ Op<-1>() = Fn;
+ }
+
+ /// isInlineAsm - Check if this call is an inline asm statement.
+ bool isInlineAsm() const {
+ return isa<InlineAsm>(Op<-1>());
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -2461,7 +2450,12 @@ public:
/// Provide fast operand accessors
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+ /// getNumArgOperands - Return the number of invoke arguments.
+ ///
unsigned getNumArgOperands() const { return getNumOperands() - 3; }
+
+ /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
+ ///
Value *getArgOperand(unsigned i) const { return getOperand(i); }
void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
@@ -2735,7 +2729,7 @@ public:
TruncInst(
Value *S, ///< The value to be truncated
const Type *Ty, ///< The (smaller) type to truncate to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2743,7 +2737,7 @@ public:
TruncInst(
Value *S, ///< The value to be truncated
const Type *Ty, ///< The (smaller) type to truncate to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2772,7 +2766,7 @@ public:
ZExtInst(
Value *S, ///< The value to be zero extended
const Type *Ty, ///< The type to zero extend to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2780,7 +2774,7 @@ public:
ZExtInst(
Value *S, ///< The value to be zero extended
const Type *Ty, ///< The type to zero extend to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2809,7 +2803,7 @@ public:
SExtInst(
Value *S, ///< The value to be sign extended
const Type *Ty, ///< The type to sign extend to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2817,7 +2811,7 @@ public:
SExtInst(
Value *S, ///< The value to be sign extended
const Type *Ty, ///< The type to sign extend to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2846,7 +2840,7 @@ public:
FPTruncInst(
Value *S, ///< The value to be truncated
const Type *Ty, ///< The type to truncate to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2854,7 +2848,7 @@ public:
FPTruncInst(
Value *S, ///< The value to be truncated
const Type *Ty, ///< The type to truncate to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2883,7 +2877,7 @@ public:
FPExtInst(
Value *S, ///< The value to be extended
const Type *Ty, ///< The type to extend to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2891,7 +2885,7 @@ public:
FPExtInst(
Value *S, ///< The value to be extended
const Type *Ty, ///< The type to extend to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2920,7 +2914,7 @@ public:
UIToFPInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2928,7 +2922,7 @@ public:
UIToFPInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2957,7 +2951,7 @@ public:
SIToFPInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -2965,7 +2959,7 @@ public:
SIToFPInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -2994,7 +2988,7 @@ public:
FPToUIInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -3002,7 +2996,7 @@ public:
FPToUIInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< Where to insert the new instruction
);
@@ -3031,7 +3025,7 @@ public:
FPToSIInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -3039,7 +3033,7 @@ public:
FPToSIInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -3064,7 +3058,7 @@ public:
IntToPtrInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -3072,7 +3066,7 @@ public:
IntToPtrInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -3104,7 +3098,7 @@ public:
PtrToIntInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -3112,7 +3106,7 @@ public:
PtrToIntInst(
Value *S, ///< The value to be converted
const Type *Ty, ///< The type to convert to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
@@ -3141,7 +3135,7 @@ public:
BitCastInst(
Value *S, ///< The value to be casted
const Type *Ty, ///< The type to casted to
- const Twine &NameStr = "", ///< A name for the new instruction
+ const Twine &NameStr = "", ///< A name for the new instruction
Instruction *InsertBefore = 0 ///< Where to insert the new instruction
);
@@ -3149,7 +3143,7 @@ public:
BitCastInst(
Value *S, ///< The value to be casted
const Type *Ty, ///< The type to casted to
- const Twine &NameStr, ///< A name for the new instruction
+ const Twine &NameStr, ///< A name for the new instruction
BasicBlock *InsertAtEnd ///< The block to insert the instruction into
);
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 48f2da9b76c6..a17fa9cc5bdd 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -103,7 +103,7 @@ namespace llvm {
Value *getValue();
uint64_t getOffset() const {
return cast<ConstantInt>(
- const_cast<Value*>(getArgOperand(1)))->getZExtValue();
+ const_cast<Value*>(getArgOperand(1)))->getZExtValue();
}
MDNode *getVariable() const { return cast<MDNode>(getArgOperand(2)); }
@@ -269,6 +269,20 @@ namespace llvm {
}
};
+ /// EHExceptionInst - This represents the llvm.eh.exception instruction.
+ ///
+ class EHExceptionInst : public IntrinsicInst {
+ public:
+ // Methods for support type inquiry through isa, cast, and dyn_cast:
+ static inline bool classof(const EHExceptionInst *) { return true; }
+ static inline bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::eh_exception;
+ }
+ static inline bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ };
+
/// EHSelectorInst - This represents the llvm.eh.selector instruction.
///
class EHSelectorInst : public IntrinsicInst {
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 444f514d59be..fb4f750f87b5 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -19,10 +19,11 @@ include "llvm/CodeGen/ValueTypes.td"
class IntrinsicProperty;
-// Intr*Mem - Memory properties. An intrinsic is allowed to have exactly one of
+// Intr*Mem - Memory properties. An intrinsic is allowed to have at most one of
// these properties set. They are listed from the most aggressive (best to use
// if correct) to the least aggressive. If no property is set, the worst case
-// is assumed (IntrWriteMem).
+// is assumed (it may read and write any memory it can get access to and it may
+// have other side effects).
// IntrNoMem - The intrinsic does not access memory or have any other side
// effects. It may be CSE'd deleted if dead, etc.
@@ -37,15 +38,11 @@ def IntrReadArgMem : IntrinsicProperty;
// deleted if dead.
def IntrReadMem : IntrinsicProperty;
-// IntrWriteArgMem - This intrinsic reads and writes only from memory that one
-// of its arguments points to, but may access an unspecified amount. The reads
-// and writes may be volatile, but except for this it has no other side effects.
-def IntrWriteArgMem : IntrinsicProperty;
-
-// IntrWriteMem - This intrinsic may read or modify unspecified memory or has
-// other side effects. It cannot be modified by the optimizer. This is the
-// default if the intrinsic has no other Intr*Mem property.
-def IntrWriteMem : IntrinsicProperty;
+// IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
+// one of its arguments points to, but may access an unspecified amount. The
+// reads and writes may be volatile, but except for this it has no other side
+// effects.
+def IntrReadWriteArgMem : IntrinsicProperty;
// Commutative - This intrinsic is commutative: X op Y == Y op X.
def Commutative : IntrinsicProperty;
@@ -117,7 +114,7 @@ def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8
def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8
def llvm_v16i8_ty : LLVMType<v16i8>; // 16 x i8
def llvm_v32i8_ty : LLVMType<v32i8>; // 32 x i8
-def llvm_v2i16_ty : LLVMType<v2i16>; // 4 x i16
+def llvm_v2i16_ty : LLVMType<v2i16>; // 2 x i16
def llvm_v4i16_ty : LLVMType<v4i16>; // 4 x i16
def llvm_v8i16_ty : LLVMType<v8i16>; // 8 x i16
def llvm_v16i16_ty : LLVMType<v16i16>; // 16 x i16
@@ -190,7 +187,7 @@ def int_gcread : Intrinsic<[llvm_ptr_ty],
[IntrReadArgMem]>;
def int_gcwrite : Intrinsic<[],
[llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
- [IntrWriteArgMem, NoCapture<1>, NoCapture<2>]>;
+ [IntrReadWriteArgMem, NoCapture<1>, NoCapture<2>]>;
//===--------------------- Code Generator Intrinsics ----------------------===//
//
@@ -204,21 +201,19 @@ def int_stacksave : Intrinsic<[llvm_ptr_ty]>,
def int_stackrestore : Intrinsic<[], [llvm_ptr_ty]>,
GCCBuiltin<"__builtin_stack_restore">;
-// IntrWriteArgMem is more pessimistic than strictly necessary for prefetch,
+// IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
// however it does conveniently prevent the prefetch from being reordered
// with respect to nearby accesses to the same memory.
def int_prefetch : Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrWriteArgMem, NoCapture<0>]>;
+ [IntrReadWriteArgMem, NoCapture<0>]>;
def int_pcmarker : Intrinsic<[], [llvm_i32_ty]>;
def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
// guard to the correct place on the stack frame.
-def int_stackprotector : Intrinsic<[],
- [llvm_ptr_ty, llvm_ptrptr_ty],
- [IntrWriteMem]>;
+def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
//===------------------- Standard C Library Intrinsics --------------------===//
//
@@ -226,15 +221,15 @@ def int_stackprotector : Intrinsic<[],
def int_memcpy : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i32_ty, llvm_i1_ty],
- [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+ [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
def int_memmove : Intrinsic<[],
[llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
llvm_i32_ty, llvm_i1_ty],
- [IntrWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+ [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
def int_memset : Intrinsic<[],
[llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
llvm_i32_ty, llvm_i1_ty],
- [IntrWriteArgMem, NoCapture<0>]>;
+ [IntrReadWriteArgMem, NoCapture<0>]>;
// These functions do not actually read memory, but they are sensitive to the
// rounding mode. This needs to be modelled separately; in the meantime
@@ -331,7 +326,7 @@ def int_annotation : Intrinsic<[llvm_anyint_ty],
//
def int_init_trampoline : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
- [IntrWriteArgMem]>,
+ [IntrReadWriteArgMem]>,
GCCBuiltin<"__builtin_init_trampoline">;
//===------------------------ Overflow Intrinsics -------------------------===//
@@ -369,79 +364,79 @@ def int_memory_barrier : Intrinsic<[],
def int_atomic_cmp_swap : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_val_compare_and_swap">;
def int_atomic_load_add : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_add">;
def int_atomic_swap : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_lock_test_and_set">;
def int_atomic_load_sub : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_sub">;
def int_atomic_load_and : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_and">;
def int_atomic_load_or : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_or">;
def int_atomic_load_xor : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_xor">;
def int_atomic_load_nand : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_nand">;
def int_atomic_load_min : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_min">;
def int_atomic_load_max : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_max">;
def int_atomic_load_umin : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_umin">;
def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty],
[LLVMAnyPointerType<LLVMMatchType<0>>,
LLVMMatchType<0>],
- [IntrWriteArgMem, NoCapture<0>]>,
+ [IntrReadWriteArgMem, NoCapture<0>]>,
GCCBuiltin<"__sync_fetch_and_umax">;
//===------------------------- Memory Use Markers -------------------------===//
//
def int_lifetime_start : Intrinsic<[],
[llvm_i64_ty, llvm_ptr_ty],
- [IntrWriteArgMem, NoCapture<1>]>;
+ [IntrReadWriteArgMem, NoCapture<1>]>;
def int_lifetime_end : Intrinsic<[],
[llvm_i64_ty, llvm_ptr_ty],
- [IntrWriteArgMem, NoCapture<1>]>;
+ [IntrReadWriteArgMem, NoCapture<1>]>;
def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
[llvm_i64_ty, llvm_ptr_ty],
[IntrReadArgMem, NoCapture<1>]>;
def int_invariant_end : Intrinsic<[],
[llvm_descriptor_ty, llvm_i64_ty,
llvm_ptr_ty],
- [IntrWriteArgMem, NoCapture<2>]>;
+ [IntrReadWriteArgMem, NoCapture<2>]>;
//===-------------------------- Other Intrinsics --------------------------===//
//
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index 40333caead7b..6c047718e6f6 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -21,6 +21,35 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
}
//===----------------------------------------------------------------------===//
+// Saturating Arithmentic
+
+let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
+ def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, Commutative]>;
+ def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP
+
+let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
+ def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">,
+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+ def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">,
+ Intrinsic<[], [llvm_i32_ty], []>;
+ def int_arm_vcvtr : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+ [IntrNoMem]>;
+ def int_arm_vcvtru : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+ [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// Advanced SIMD (NEON)
let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
@@ -31,9 +60,6 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
class Neon_1Arg_Narrow_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
- class Neon_1Arg_Long_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
- [LLVMTruncatedElementVectorType<0>], [IntrNoMem]>;
class Neon_2Arg_Intrinsic
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
@@ -47,10 +73,6 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
[LLVMTruncatedElementVectorType<0>,
LLVMTruncatedElementVectorType<0>],
[IntrNoMem]>;
- class Neon_2Arg_Wide_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>, LLVMTruncatedElementVectorType<0>],
- [IntrNoMem]>;
class Neon_3Arg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
@@ -102,25 +124,13 @@ let Properties = [IntrNoMem, Commutative] in {
def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic;
def int_arm_neon_vaddhn : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
- def int_arm_neon_vaddls : Neon_2Arg_Long_Intrinsic;
- def int_arm_neon_vaddlu : Neon_2Arg_Long_Intrinsic;
- def int_arm_neon_vaddws : Neon_2Arg_Wide_Intrinsic;
- def int_arm_neon_vaddwu : Neon_2Arg_Wide_Intrinsic;
// Vector Multiply.
def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
- def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic;
- def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic;
def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
-
- // Vector Multiply and Accumulate/Subtract.
- def int_arm_neon_vmlals : Neon_3Arg_Long_Intrinsic;
- def int_arm_neon_vmlalu : Neon_3Arg_Long_Intrinsic;
- def int_arm_neon_vmlsls : Neon_3Arg_Long_Intrinsic;
- def int_arm_neon_vmlslu : Neon_3Arg_Long_Intrinsic;
def int_arm_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
def int_arm_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
@@ -146,10 +156,6 @@ def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic;
def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic;
def int_arm_neon_vsubhn : Neon_2Arg_Narrow_Intrinsic;
def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
-def int_arm_neon_vsubls : Neon_2Arg_Long_Intrinsic;
-def int_arm_neon_vsublu : Neon_2Arg_Long_Intrinsic;
-def int_arm_neon_vsubws : Neon_2Arg_Wide_Intrinsic;
-def int_arm_neon_vsubwu : Neon_2Arg_Wide_Intrinsic;
// Vector Absolute Compare.
let TargetPrefix = "arm" in {
@@ -170,14 +176,6 @@ let TargetPrefix = "arm" in {
// Vector Absolute Differences.
def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
-def int_arm_neon_vabdls : Neon_2Arg_Long_Intrinsic;
-def int_arm_neon_vabdlu : Neon_2Arg_Long_Intrinsic;
-
-// Vector Absolute Difference and Accumulate.
-def int_arm_neon_vabas : Neon_3Arg_Intrinsic;
-def int_arm_neon_vabau : Neon_3Arg_Intrinsic;
-def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
-def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
// Vector Pairwise Add.
def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
@@ -288,13 +286,10 @@ def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
-// Narrowing and Lengthening Vector Moves.
-def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic;
+// Narrowing Saturating Vector Moves.
def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
-def int_arm_neon_vmovls : Neon_1Arg_Long_Intrinsic;
-def int_arm_neon_vmovlu : Neon_1Arg_Long_Intrinsic;
// Vector Table Lookup.
// The first 1-4 arguments are the table.
@@ -315,62 +310,76 @@ def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
let TargetPrefix = "arm" in {
// De-interleaving vector loads from N-element structures.
+ // Source operands are the address and alignment.
def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
- [llvm_ptr_ty], [IntrReadArgMem]>;
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrReadArgMem]>;
def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrReadArgMem]>;
def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrReadArgMem]>;
def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
- [llvm_ptr_ty], [IntrReadArgMem]>;
+ [llvm_ptr_ty, llvm_i32_ty],
+ [IntrReadArgMem]>;
// Vector load N-element structure to one lane.
+ // Source operands are: the address, the N input vectors (since only one
+ // lane is assigned), the lane number, and the alignment.
def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_ptr_ty, LLVMMatchType<0>,
- LLVMMatchType<0>, llvm_i32_ty],
- [IntrReadArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty,
+ llvm_i32_ty], [IntrReadArgMem]>;
def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>],
[llvm_ptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
- llvm_i32_ty], [IntrReadArgMem]>;
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrReadArgMem]>;
def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>],
[llvm_ptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>, llvm_i32_ty],
- [IntrReadArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty,
+ llvm_i32_ty], [IntrReadArgMem]>;
// Interleaving vector stores from N-element structures.
+ // Source operands are: the address, the N vectors, and the alignment.
def int_arm_neon_vst1 : Intrinsic<[],
- [llvm_ptr_ty, llvm_anyvector_ty],
- [IntrWriteArgMem]>;
+ [llvm_ptr_ty, llvm_anyvector_ty,
+ llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst2 : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
def int_arm_neon_vst3 : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrWriteArgMem]>;
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst4 : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>], [IntrWriteArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
// Vector store N-element structure from one lane.
+ // Source operands are: the address, the N vectors, the lane number, and
+ // the alignment.
def int_arm_neon_vst2lane : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
- LLVMMatchType<0>, llvm_i32_ty],
- [IntrWriteArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty,
+ llvm_i32_ty], [IntrReadWriteArgMem]>;
def int_arm_neon_vst3lane : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
- llvm_i32_ty], [IntrWriteArgMem]>;
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrReadWriteArgMem]>;
def int_arm_neon_vst4lane : Intrinsic<[],
[llvm_ptr_ty, llvm_anyvector_ty,
LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMMatchType<0>, llvm_i32_ty],
- [IntrWriteArgMem]>;
+ LLVMMatchType<0>, llvm_i32_ty,
+ llvm_i32_ty], [IntrReadWriteArgMem]>;
}
diff --git a/include/llvm/IntrinsicsPowerPC.td b/include/llvm/IntrinsicsPowerPC.td
index 4e959f337c00..da85bfba8631 100644
--- a/include/llvm/IntrinsicsPowerPC.td
+++ b/include/llvm/IntrinsicsPowerPC.td
@@ -18,17 +18,17 @@
// Non-altivec intrinsics.
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
- def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
- def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
+ def int_ppc_dcba : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbf : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbi : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbt : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbz : Intrinsic<[], [llvm_ptr_ty], []>;
+ def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
// sync instruction
- def int_ppc_sync : Intrinsic<[], [], [IntrWriteMem]>;
+ def int_ppc_sync : Intrinsic<[], [], []>;
}
@@ -86,31 +86,31 @@ class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix>
let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Data Stream Control.
def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
- Intrinsic<[], [llvm_i32_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_i32_ty], []>;
def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ []>;
def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ []>;
def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ []>;
def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
Intrinsic<[],
[llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ []>;
// VSCR access.
def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
- Intrinsic<[], [llvm_v4i32_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v4i32_ty], []>;
// Loads. These don't map directly to GCC builtins because they represent the
@@ -129,20 +129,15 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Stores. These don't map directly to GCC builtins because they represent the
// source address with a single pointer.
def int_ppc_altivec_stvx :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
def int_ppc_altivec_stvxl :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
def int_ppc_altivec_stvebx :
- Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
def int_ppc_altivec_stvehx :
- Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
def int_ppc_altivec_stvewx :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
// Comparisons setting a vector.
def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index cea485681089..06ea3ae3b518 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -11,6 +11,11 @@
//
//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// Interrupt traps
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
+}
//===----------------------------------------------------------------------===//
// SSE1
@@ -143,24 +148,24 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v4f32_ty], [IntrWriteMem]>;
+ llvm_v4f32_ty], []>;
}
// Cacheability support ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_movnt_ps : GCCBuiltin<"__builtin_ia32_movntps">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v4f32_ty], [IntrWriteMem]>;
+ llvm_v4f32_ty], []>;
def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
}
// Control register.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse_stmxcsr :
- Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_sse_ldmxcsr :
- Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_ptr_ty], []>;
}
// Misc.
@@ -459,26 +464,26 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v2f64_ty], [IntrWriteMem]>;
+ llvm_v2f64_ty], []>;
def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v16i8_ty], [IntrWriteMem]>;
+ llvm_v16i8_ty], []>;
def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v4i32_ty], [IntrWriteMem]>;
+ llvm_v4i32_ty], []>;
}
// Cacheability support ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse2_movnt_dq : GCCBuiltin<"__builtin_ia32_movntdq">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v2i64_ty], [IntrWriteMem]>;
+ llvm_v2i64_ty], []>;
def int_x86_sse2_movnt_pd : GCCBuiltin<"__builtin_ia32_movntpd">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_v2f64_ty], [IntrWriteMem]>;
+ llvm_v2f64_ty], []>;
def int_x86_sse2_movnt_i : GCCBuiltin<"__builtin_ia32_movnti">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_i32_ty], [IntrWriteMem]>;
+ llvm_i32_ty], []>;
}
// Misc.
@@ -498,13 +503,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
Intrinsic<[], [llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_ptr_ty], [IntrWriteMem]>;
+ llvm_v16i8_ty, llvm_ptr_ty], []>;
def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
- Intrinsic<[], [llvm_ptr_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_ptr_ty], []>;
def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
}
//===----------------------------------------------------------------------===//
@@ -546,10 +551,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
Intrinsic<[], [llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [IntrWriteMem]>;
+ llvm_i32_ty, llvm_i32_ty], []>;
def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
Intrinsic<[], [llvm_i32_ty,
- llvm_i32_ty], [IntrWriteMem]>;
+ llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
@@ -625,6 +630,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_ssse3_pshuf_b_128 : GCCBuiltin<"__builtin_ia32_pshufb128">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty], [IntrNoMem]>;
+ def int_x86_ssse3_pshuf_w : GCCBuiltin<"__builtin_ia32_pshufw">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
// Sign ops
@@ -978,19 +986,360 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
}
//===----------------------------------------------------------------------===//
+// AVX
+
+// Arithmetic ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Horizontal ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector permutation
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+ llvm_v2i64_ty], [IntrNoMem]>;
+ def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+ llvm_v4i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_vpermilvar_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx_vpermilvar_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+
+ def int_x86_avx_vperm2f128_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vperm2f128_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vperm2f128_si_256 :
+ GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx_vpermil_pd : GCCBuiltin<"__builtin_ia32_vpermilpd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vpermil_ps : GCCBuiltin<"__builtin_ia32_vpermilps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx_vpermil_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vpermil_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector dot product
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector compare
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector extract and insert
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_vextractf128_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vextractf128_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vextractf128_si_256 :
+ GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx_vinsertf128_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vinsertf128_pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+ llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vinsertf128_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vinsertf128_ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+ llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx_vinsertf128_si_256 :
+ GCCBuiltin<"__builtin_ia32_vinsertf128_si256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector convert
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+ def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+ def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector bit test
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+ llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+ llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
+ Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+ llvm_v2f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+ llvm_v4f32_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+ llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+ llvm_v8f32_ty], [IntrNoMem]>;
+ def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty], [IntrNoMem]>;
+ def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty], [IntrNoMem]>;
+}
+
+// Vector extract sign mask
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+ def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
+ Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector zero
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
+ Intrinsic<[], [], []>;
+ def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
+ Intrinsic<[], [], []>;
+}
+
+// Vector load with broadcast
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_vbroadcastss :
+ GCCBuiltin<"__builtin_ia32_vbroadcastss">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_vbroadcast_sd_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_vbroadcastss_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_vbroadcastf128_pd_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_vbroadcastf128_ps_256 :
+ GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD load ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_loadu_pd_256 : GCCBuiltin<"__builtin_ia32_loadupd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_loadu_ps_256 : GCCBuiltin<"__builtin_ia32_loadups256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_loadu_dq_256 : GCCBuiltin<"__builtin_ia32_loaddqu256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+ def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
+ def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
+ def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], []>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_movnt_dq_256 : GCCBuiltin<"__builtin_ia32_movntdq256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty], []>;
+ def int_x86_avx_movnt_pd_256 : GCCBuiltin<"__builtin_ia32_movntpd256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
+ def int_x86_avx_movnt_ps_256 : GCCBuiltin<"__builtin_ia32_movntps256">,
+ Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
+}
+
+// Conditional load ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
+ Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty], [IntrReadMem]>;
+ def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
+ Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty], [IntrReadMem]>;
+ def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
+ Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadMem]>;
+ def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
+ Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadMem]>;
+}
+
+// Conditional store ops
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
+ Intrinsic<[], [llvm_ptr_ty,
+ llvm_v2f64_ty, llvm_v2f64_ty], []>;
+ def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
+ Intrinsic<[], [llvm_ptr_ty,
+ llvm_v4f32_ty, llvm_v4f32_ty], []>;
+ def int_x86_avx_maskstore_pd_256 :
+ GCCBuiltin<"__builtin_ia32_maskstorepd256">,
+ Intrinsic<[], [llvm_ptr_ty,
+ llvm_v4f64_ty, llvm_v4f64_ty], []>;
+ def int_x86_avx_maskstore_ps_256 :
+ GCCBuiltin<"__builtin_ia32_maskstoreps256">,
+ Intrinsic<[], [llvm_ptr_ty,
+ llvm_v8f32_ty, llvm_v8f32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
// MMX
// Empty MMX state op.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_emms : GCCBuiltin<"__builtin_ia32_emms">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
- Intrinsic<[], [], [IntrWriteMem]>;
+ Intrinsic<[], [], []>;
}
// Integer arithmetic ops.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Addition
+ def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
+ Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+
def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
llvm_v8i8_ty], [IntrNoMem, Commutative]>;
@@ -1006,6 +1355,19 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v4i16_ty], [IntrNoMem, Commutative]>;
// Subtraction
+ def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
+ Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+
def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
llvm_v8i8_ty], [IntrNoMem]>;
@@ -1024,6 +1386,9 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+ def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
+ llvm_v4i16_ty], [IntrNoMem, Commutative]>;
def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty,
llvm_v4i16_ty], [IntrNoMem, Commutative]>;
@@ -1034,6 +1399,20 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2i32_ty], [llvm_v4i16_ty,
llvm_v4i16_ty], [IntrNoMem, Commutative]>;
+ // Bitwise operations
+ def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty],
+ [IntrNoMem]>;
+
// Averages
def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty,
@@ -1135,6 +1514,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v4i16_ty], [IntrNoMem]>;
}
+// Unpacking ops.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+ def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
+ Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
+ Intrinsic<[llvm_v8i8_ty], [llvm_v8i8_ty, llvm_v8i8_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
+ Intrinsic<[llvm_v4i16_ty], [llvm_v4i16_ty, llvm_v4i16_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
+ Intrinsic<[llvm_v2i32_ty], [llvm_v2i32_ty, llvm_v2i32_ty],
+ [IntrNoMem]>;
+}
+
// Integer comparison ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
@@ -1161,14 +1562,47 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
// Misc.
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
- Intrinsic<[],
- [llvm_v8i8_ty, llvm_v8i8_ty, llvm_ptr_ty],
- [IntrWriteMem]>;
+ Intrinsic<[], [llvm_v8i8_ty, llvm_v8i8_ty, llvm_ptr_ty], []>;
def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
Intrinsic<[llvm_i32_ty], [llvm_v8i8_ty], [IntrNoMem]>;
def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
- Intrinsic<[], [llvm_ptr_ty,
- llvm_v1i64_ty], [IntrWriteMem]>;
+ Intrinsic<[], [llvm_ptr_ty, llvm_v1i64_ty], []>;
+
+// def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
+// Intrinsic<[llvm_v1i64_ty], [llvm_1i64_ty,
+// llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_mmx_pextr_w :
+ Intrinsic<[llvm_i32_ty], [llvm_v1i64_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_mmx_pinsr_w :
+ Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty,
+ llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+ def int_x86_mmx_cvtsi32_si64 :
+ Intrinsic<[llvm_v1i64_ty], [llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_mmx_cvtsi64_si32 :
+ Intrinsic<[llvm_i32_ty], [llvm_v1i64_ty], [IntrNoMem]>;
+
+ def int_x86_mmx_vec_init_b : GCCBuiltin<"__builtin_ia32_vec_init_v8qi">,
+ Intrinsic<[llvm_v8i8_ty],
+ [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty, llvm_i8_ty,
+ llvm_i8_ty, llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_vec_init_w : GCCBuiltin<"__builtin_ia32_vec_init_v4hi">,
+ Intrinsic<[llvm_v4i16_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_mmx_vec_init_d : GCCBuiltin<"__builtin_ia32_vec_init_v2si">,
+ Intrinsic<[llvm_v2i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_mmx_vec_ext_d : GCCBuiltin<"__builtin_ia32_vec_ext_v2si">,
+ Intrinsic<[llvm_v2i32_ty],
+ [llvm_v2i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
}
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index afae08b07daf..7cb6579aef66 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -40,7 +40,7 @@ public:
// Pinned metadata names, which always have the same value. This is a
// compile-time performance optimization, not a correctness optimization.
enum {
- MD_dbg = 1 // "dbg" -> 1.
+ MD_dbg = 0 // "dbg"
};
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -48,8 +48,7 @@ public:
unsigned getMDKindID(StringRef Name) const;
/// getMDKindNames - Populate client supplied SmallVector with the name for
- /// custom metadata IDs registered in this LLVMContext. ID #0 is not used,
- /// so it is filled in as an empty string.
+ /// custom metadata IDs registered in this LLVMContext.
void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
/// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 876703b90320..35dab62143df 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -22,6 +22,7 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/PointerTracking.h"
#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/RegionPrinter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Assembly/PrintModulePass.h"
@@ -52,6 +53,7 @@ namespace {
(void) llvm::createBasicAliasAnalysisPass();
(void) llvm::createLibCallAliasAnalysisPass(0);
(void) llvm::createScalarEvolutionAliasAnalysisPass();
+ (void) llvm::createTypeBasedAliasAnalysisPass();
(void) llvm::createBlockPlacementPass();
(void) llvm::createBreakCriticalEdgesPass();
(void) llvm::createCFGSimplificationPass();
@@ -106,6 +108,11 @@ namespace {
(void) llvm::createPostDomOnlyViewerPass();
(void) llvm::createPostDomViewerPass();
(void) llvm::createReassociatePass();
+ (void) llvm::createRegionInfoPass();
+ (void) llvm::createRegionOnlyPrinterPass();
+ (void) llvm::createRegionOnlyViewerPass();
+ (void) llvm::createRegionPrinterPass();
+ (void) llvm::createRegionViewerPass();
(void) llvm::createSCCPPass();
(void) llvm::createScalarReplAggregatesPass();
(void) llvm::createSimplifyLibCallsPass();
@@ -135,12 +142,11 @@ namespace {
(void) llvm::createDbgInfoPrinterPass();
(void) llvm::createModuleDebugInfoPrinterPass();
(void) llvm::createPartialInliningPass();
- (void) llvm::createSSIPass();
- (void) llvm::createSSIEverythingPass();
(void) llvm::createGEPSplitterPass();
- (void) llvm::createABCDPass();
(void) llvm::createLintPass();
(void) llvm::createSinkingPass();
+ (void) llvm::createLowerAtomicPass();
+ (void) llvm::createCorrelatedValuePropagationPass();
(void)new llvm::IntervalPartition();
(void)new llvm::FindUsedTypes();
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
index 6cf2c4b8aada..6959cb6d1efc 100644
--- a/include/llvm/LinkAllVMCore.h
+++ b/include/llvm/LinkAllVMCore.h
@@ -33,7 +33,6 @@
#include "llvm/System/TimeValue.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/SlowOperationInformer.h"
#include <cstdlib>
namespace {
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index cc7bf88115c5..b402a6090e2c 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -158,7 +158,6 @@ class Linker {
/// @returns true if an error occurred, false otherwise
/// @see LinkItemKind
/// @see getLastError
- /// @throws nothing
bool LinkInItems (
const ItemList& Items, ///< Set of libraries/files to link in
ItemList& NativeItems ///< Output list of native files/libs
diff --git a/include/llvm/MC/ELFObjectWriter.h b/include/llvm/MC/ELFObjectWriter.h
new file mode 100644
index 000000000000..3b9951f4e7ab
--- /dev/null
+++ b/include/llvm/MC/ELFObjectWriter.h
@@ -0,0 +1,46 @@
+//===-- llvm/MC/ELFObjectWriter.h - ELF File Writer ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_ELFOBJECTWRITER_H
+#define LLVM_MC_ELFOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmFixup;
+class MCAssembler;
+class MCFragment;
+class MCValue;
+class raw_ostream;
+
+class ELFObjectWriter : public MCObjectWriter {
+ void *Impl;
+
+public:
+ ELFObjectWriter(raw_ostream &OS, bool Is64Bit, bool IsLittleEndian = true,
+ bool HasRelocationAddend = true);
+
+ virtual ~ELFObjectWriter();
+
+ virtual void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+ virtual void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue);
+
+ virtual void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 8516de0188d6..43952e0845da 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -255,6 +255,14 @@ namespace llvm {
/// DwarfSectionOffsetDirective - Special section offset directive.
const char* DwarfSectionOffsetDirective; // Defaults to NULL
+ /// DwarfUsesAbsoluteLabelForStmtList - True if DW_AT_stmt_list needs
+ /// absolute label instead of offset.
+ bool DwarfUsesAbsoluteLabelForStmtList; // Defaults to true;
+
+ // DwarfUsesLabelOffsetDifference - True if Dwarf2 output can
+ // use EmitLabelOffsetDifference.
+ bool DwarfUsesLabelOffsetForRanges;
+
//===--- CBE Asm Translation Table -----------------------------------===//
const char *const *AsmTransCBE; // Defaults to empty
@@ -417,6 +425,12 @@ namespace llvm {
const char *getDwarfSectionOffsetDirective() const {
return DwarfSectionOffsetDirective;
}
+ bool doesDwarfUsesAbsoluteLabelForStmtList() const {
+ return DwarfUsesAbsoluteLabelForStmtList;
+ }
+ bool doesDwarfUsesLabelOffsetForRanges() const {
+ return DwarfUsesLabelOffsetForRanges;
+ }
const char *const *getAsmCBE() const {
return AsmTransCBE;
}
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 07ca070ab288..d193b986a934 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -24,6 +24,7 @@ namespace llvm {
class raw_ostream;
class MCAsmLayout;
class MCAssembler;
+class MCBinaryExpr;
class MCContext;
class MCCodeEmitter;
class MCExpr;
@@ -87,6 +88,7 @@ protected:
public:
// Only for sentinel.
MCFragment();
+ virtual ~MCFragment();
FragmentType getKind() const { return Kind; }
@@ -162,7 +164,7 @@ class MCInstFragment : public MCFragment {
/// Inst - The instruction this is a fragment for.
MCInst Inst;
- /// InstSize - The size of the currently encoded instruction.
+ /// Code - Binary data for the currently encoded instruction.
SmallString<8> Code;
/// Fixups - The list of fixups in this fragment.
@@ -452,6 +454,10 @@ public:
// common symbol can never get a definition.
uint64_t CommonSize;
+ /// SymbolSize - An expression describing how to calculate the size of
+ /// a symbol. If a symbol has no size this field will be NULL.
+ const MCExpr *SymbolSize;
+
/// CommonAlign - The alignment of the symbol, if it is 'common'.
//
// FIXME: Pack this in with other fields?
@@ -509,6 +515,15 @@ public:
return CommonSize;
}
+ void setSize(const MCExpr *SS) {
+ SymbolSize = SS;
+ }
+
+ const MCExpr *getSize() const {
+ return SymbolSize;
+ }
+
+
/// getCommonAlignment - Return the alignment of a 'common' symbol.
unsigned getCommonAlignment() const {
assert(isCommon() && "Not a 'common' symbol!");
@@ -649,6 +664,8 @@ public:
void WriteSectionData(const MCSectionData *Section, const MCAsmLayout &Layout,
MCObjectWriter *OW) const;
+ void AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout);
+
public:
/// Construct a new assembler instance.
///
@@ -669,7 +686,9 @@ public:
MCCodeEmitter &getEmitter() const { return Emitter; }
/// Finish - Do final processing and write the object to the output stream.
- void Finish();
+ /// \arg Writer is used for custom object writer (as the MCJIT does),
+ /// if not specified it is automatically created from backend.
+ void Finish(MCObjectWriter *Writer = 0);
// FIXME: This does not belong here.
bool getSubsectionsViaSymbols() const {
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index a57b5bf745d3..d22868cdbd0c 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -11,10 +11,12 @@
#define LLVM_MC_MCCONTEXT_H
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
+#include <vector> // FIXME: Shouldn't be needed.
namespace llvm {
class MCAsmInfo;
@@ -22,6 +24,9 @@ namespace llvm {
class MCSection;
class MCSymbol;
class MCLabel;
+ class MCDwarfFile;
+ class MCDwarfLoc;
+ class MCLineSection;
class StringRef;
class Twine;
class MCSectionMachO;
@@ -35,9 +40,6 @@ namespace llvm {
/// The MCAsmInfo for this target.
const MCAsmInfo &MAI;
-
- /// Sections - Bindings of names to allocated sections.
- StringMap<MCSection*> Sections;
/// Symbols - Bindings of names to symbols.
StringMap<MCSymbol*> Symbols;
@@ -66,6 +68,18 @@ namespace llvm {
/// .secure_log_reset appearing between them.
bool SecureLogUsed;
+ /// The dwarf file and directory tables from the dwarf .file directive.
+ std::vector<MCDwarfFile *> MCDwarfFiles;
+ std::vector<StringRef> MCDwarfDirs;
+
+ /// The current dwarf line information from the last dwarf .loc directive.
+ MCDwarfLoc CurrentDwarfLoc;
+ bool DwarfLocSeen;
+
+ /// The dwarf line information from the .loc directives for the sections
+ /// with assembled machine instructions have after seeing .loc directives.
+ DenseMap<const MCSection *, MCLineSection *> MCLineSections;
+
/// Allocator - Allocator object used for creating machine code objects.
///
/// We use a bump pointer allocator to avoid the need to track all allocated
@@ -126,7 +140,8 @@ namespace llvm {
const MCSection *getELFSection(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind,
- bool IsExplicit = false);
+ bool IsExplicit = false,
+ unsigned EntrySize = 0);
const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
int Selection, SectionKind Kind);
@@ -139,6 +154,43 @@ namespace llvm {
/// @}
+ /// @name Dwarf Managment
+ /// @{
+
+ /// GetDwarfFile - creates an entry in the dwarf file and directory tables.
+ unsigned GetDwarfFile(StringRef FileName, unsigned FileNumber);
+
+ bool ValidateDwarfFileNumber(unsigned FileNumber);
+
+ const std::vector<MCDwarfFile *> &getMCDwarfFiles() {
+ return MCDwarfFiles;
+ }
+ const std::vector<StringRef> &getMCDwarfDirs() {
+ return MCDwarfDirs;
+ }
+ DenseMap<const MCSection *, MCLineSection *> &getMCLineSections() {
+ return MCLineSections;
+ }
+
+ /// setCurrentDwarfLoc - saves the information from the currently parsed
+ /// dwarf .loc directive and sets DwarfLocSeen. When the next instruction /// is assembled an entry in the line number table with this information and
+ /// the address of the instruction will be created.
+ void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column,
+ unsigned Flags, unsigned Isa) {
+ CurrentDwarfLoc.setFileNum(FileNum);
+ CurrentDwarfLoc.setLine(Line);
+ CurrentDwarfLoc.setColumn(Column);
+ CurrentDwarfLoc.setFlags(Flags);
+ CurrentDwarfLoc.setIsa(Isa);
+ DwarfLocSeen = true;
+ }
+ void clearDwarfLocSeen() { DwarfLocSeen = false; }
+
+ bool getDwarfLocSeen() { return DwarfLocSeen; }
+ const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
+
+ /// @}
+
char *getSecureLogFile() { return SecureLogFile; }
raw_ostream *getSecureLog() { return SecureLog; }
bool getSecureLogUsed() { return SecureLogUsed; }
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
new file mode 100644
index 000000000000..dac875cf1b67
--- /dev/null
+++ b/include/llvm/MC/MCDwarf.h
@@ -0,0 +1,156 @@
+//===- MCDwarf.h - Machine Code Dwarf support -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCDwarfFile to support the dwarf
+// .file directive.
+// TODO: add the support needed for the .loc directive.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCDWARF_H
+#define LLVM_MC_MCDWARF_H
+
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+namespace llvm {
+ class MCContext;
+ class MCSection;
+ class MCSymbol;
+ class raw_ostream;
+
+ /// MCDwarfFile - Instances of this class represent the name of the dwarf
+ /// .file directive and its associated dwarf file number in the MC file,
+ /// and MCDwarfFile's are created and unique'd by the MCContext class where
+ /// the file number for each is its index into the vector of DwarfFiles (note
+ /// index 0 is not used and not a valid dwarf file number).
+ class MCDwarfFile {
+ // Name - the base name of the file without its directory path.
+ // The StringRef references memory allocated in the MCContext.
+ StringRef Name;
+
+ // DirIndex - the index into the list of directory names for this file name.
+ unsigned DirIndex;
+
+ private: // MCContext creates and uniques these.
+ friend class MCContext;
+ MCDwarfFile(StringRef name, unsigned dirIndex)
+ : Name(name), DirIndex(dirIndex) {}
+
+ MCDwarfFile(const MCDwarfFile&); // DO NOT IMPLEMENT
+ void operator=(const MCDwarfFile&); // DO NOT IMPLEMENT
+ public:
+ /// getName - Get the base name of this MCDwarfFile.
+ StringRef getName() const { return Name; }
+
+ /// getDirIndex - Get the dirIndex of this MCDwarfFile.
+ unsigned getDirIndex() const { return DirIndex; }
+
+
+ /// print - Print the value to the stream \arg OS.
+ void print(raw_ostream &OS) const;
+
+ /// dump - Print the value to stderr.
+ void dump() const;
+ };
+
+ inline raw_ostream &operator<<(raw_ostream &OS, const MCDwarfFile &DwarfFile){
+ DwarfFile.print(OS);
+ return OS;
+ }
+
+ /// MCDwarfLoc - Instances of this class represent the information from a
+ /// dwarf .loc directive.
+ class MCDwarfLoc {
+ // FileNum - the file number.
+ unsigned FileNum;
+ // Line - the line number.
+ unsigned Line;
+ // Column - the column position.
+ unsigned Column;
+ // Flags (see #define's below)
+ unsigned Flags;
+ // Isa
+ unsigned Isa;
+
+#define DWARF2_FLAG_IS_STMT (1 << 0)
+#define DWARF2_FLAG_BASIC_BLOCK (1 << 1)
+#define DWARF2_FLAG_PROLOGUE_END (1 << 2)
+#define DWARF2_FLAG_EPILOGUE_BEGIN (1 << 3)
+
+ private: // MCContext manages these
+ friend class MCContext;
+ friend class MCLineEntry;
+ MCDwarfLoc(unsigned fileNum, unsigned line, unsigned column, unsigned flags,
+ unsigned isa)
+ : FileNum(fileNum), Line(line), Column(column), Flags(flags), Isa(isa) {}
+
+ // Allow the default copy constructor and assignment operator to be used
+ // for an MCDwarfLoc object.
+
+ public:
+ /// setFileNum - Set the FileNum of this MCDwarfLoc.
+ void setFileNum(unsigned fileNum) { FileNum = fileNum; }
+
+ /// setLine - Set the Line of this MCDwarfLoc.
+ void setLine(unsigned line) { Line = line; }
+
+ /// setColumn - Set the Column of this MCDwarfLoc.
+ void setColumn(unsigned column) { Column = column; }
+
+ /// setFlags - Set the Flags of this MCDwarfLoc.
+ void setFlags(unsigned flags) { Flags = flags; }
+
+ /// setIsa - Set the Isa of this MCDwarfLoc.
+ void setIsa(unsigned isa) { Isa = isa; }
+ };
+
+ /// MCLineEntry - Instances of this class represent the line information for
+ /// the dwarf line table entries. Which is created after a machine
+ /// instruction is assembled and uses an address from a temporary label
+ /// created at the current address in the current section and the info from
+ /// the last .loc directive seen as stored in the context.
+ class MCLineEntry : public MCDwarfLoc {
+ MCSymbol *Label;
+
+ private:
+ // Allow the default copy constructor and assignment operator to be used
+ // for an MCLineEntry object.
+
+ public:
+ // Constructor to create an MCLineEntry given a symbol and the dwarf loc.
+ MCLineEntry(MCSymbol *label, const MCDwarfLoc loc) : MCDwarfLoc(loc),
+ Label(label) {}
+ };
+
+ /// MCLineSection - Instances of this class represent the line information
+ /// for a section where machine instructions have been assembled after seeing
+ /// .loc directives. This is the information used to build the dwarf line
+ /// table for a section.
+ class MCLineSection {
+ std::vector<MCLineEntry> MCLineEntries;
+
+ private:
+ MCLineSection(const MCLineSection&); // DO NOT IMPLEMENT
+ void operator=(const MCLineSection&); // DO NOT IMPLEMENT
+
+ public:
+ // Constructor to create an MCLineSection with an empty MCLineEntries
+ // vector.
+ MCLineSection() {}
+
+ // addLineEntry - adds an entry to this MCLineSection's line entries
+ void addLineEntry(const MCLineEntry &LineEntry) {
+ MCLineEntries.push_back(LineEntry);
+ }
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCELFSymbolFlags.h b/include/llvm/MC/MCELFSymbolFlags.h
new file mode 100644
index 000000000000..eb7978b18c5c
--- /dev/null
+++ b/include/llvm/MC/MCELFSymbolFlags.h
@@ -0,0 +1,54 @@
+//===- MCELFSymbolFlags.h - ELF Symbol Flags ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolFlags used for the ELF target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSYMBOLFLAGS_H
+#define LLVM_MC_MCELFSYMBOLFLAGS_H
+
+#include "llvm/Support/ELF.h"
+
+// Because all the symbol flags need to be stored in the MCSymbolData
+// 'flags' variable we need to provide shift constants per flag type.
+
+namespace llvm {
+ enum {
+ ELF_STT_Shift = 0, // Shift value for STT_* flags.
+ ELF_STB_Shift = 4, // Shift value for STB_* flags.
+ ELF_STV_Shift = 8 // Shift value ofr STV_* flags.
+ };
+
+ enum SymbolFlags {
+ ELF_STB_Local = (ELF::STB_LOCAL << ELF_STB_Shift),
+ ELF_STB_Global = (ELF::STB_GLOBAL << ELF_STB_Shift),
+ ELF_STB_Weak = (ELF::STB_WEAK << ELF_STB_Shift),
+ ELF_STB_Loproc = (ELF::STB_LOPROC << ELF_STB_Shift),
+ ELF_STB_Hiproc = (ELF::STB_HIPROC << ELF_STB_Shift),
+
+ ELF_STT_Notype = (ELF::STT_NOTYPE << ELF_STT_Shift),
+ ELF_STT_Object = (ELF::STT_OBJECT << ELF_STT_Shift),
+ ELF_STT_Func = (ELF::STT_FUNC << ELF_STT_Shift),
+ ELF_STT_Section = (ELF::STT_SECTION << ELF_STT_Shift),
+ ELF_STT_File = (ELF::STT_FILE << ELF_STT_Shift),
+ ELF_STT_Common = (ELF::STT_COMMON << ELF_STT_Shift),
+ ELF_STT_Tls = (ELF::STT_TLS << ELF_STT_Shift),
+ ELF_STT_Loproc = (ELF::STT_LOPROC << ELF_STT_Shift),
+ ELF_STT_Hiproc = (ELF::STT_HIPROC << ELF_STT_Shift),
+
+ ELF_STV_Default = (ELF::STV_DEFAULT << ELF_STV_Shift),
+ ELF_STV_Internal = (ELF::STV_INTERNAL << ELF_STV_Shift),
+ ELF_STV_Hidden = (ELF::STV_HIDDEN << ELF_STV_Shift),
+ ELF_STV_Protected = (ELF::STV_PROTECTED << ELF_STV_Shift)
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 7b9ff00fc078..ea6d9c12338d 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -16,6 +16,9 @@ namespace llvm {
class MCAssembler;
class MCCodeEmitter;
class MCSectionData;
+class MCExpr;
+class MCFragment;
+class MCDataFragment;
class TargetAsmBackend;
class raw_ostream;
@@ -39,6 +42,14 @@ protected:
return CurSectionData;
}
+ MCFragment *getCurrentFragment() const;
+
+ /// Get a data fragment to write into, creating a new one if the current
+ /// fragment is not a data fragment.
+ MCDataFragment *getOrCreateDataFragment() const;
+
+ const MCExpr *AddValueSymbols(const MCExpr *Value);
+
public:
MCAssembler &getAssembler() { return *Assembler; }
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 22eea7e022e3..f1c1cb8a5991 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -162,7 +162,7 @@ public:
/// @}
};
-MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS);
+MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
} // End llvm namespace
diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h
deleted file mode 100644
index 0e8570aa908b..000000000000
--- a/include/llvm/MC/MCParser/AsmParser.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class declares the parser for assembly files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ASMPARSER_H
-#define ASMPARSER_H
-
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/AsmCond.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/ADT/StringMap.h"
-#include <vector>
-
-namespace llvm {
-class AsmCond;
-class AsmToken;
-class MCAsmParserExtension;
-class MCContext;
-class MCExpr;
-class MCInst;
-class MCStreamer;
-class MCAsmInfo;
-class SourceMgr;
-class TargetAsmParser;
-class Twine;
-
-class AsmParser : public MCAsmParser {
- AsmParser(const AsmParser &); // DO NOT IMPLEMENT
- void operator=(const AsmParser &); // DO NOT IMPLEMENT
-private:
- AsmLexer Lexer;
- MCContext &Ctx;
- MCStreamer &Out;
- SourceMgr &SrcMgr;
- MCAsmParserExtension *GenericParser;
- MCAsmParserExtension *PlatformParser;
- TargetAsmParser *TargetParser;
-
- /// This is the current buffer index we're lexing from as managed by the
- /// SourceMgr object.
- int CurBuffer;
-
- AsmCond TheCondState;
- std::vector<AsmCond> TheCondStack;
-
- /// DirectiveMap - This is a table handlers for directives. Each handler is
- /// invoked after the directive identifier is read and is responsible for
- /// parsing and validating the rest of the directive. The handler is passed
- /// in the directive name and the location of the directive keyword.
- StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
-public:
- AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
- const MCAsmInfo &MAI);
- ~AsmParser();
-
- bool Run(bool NoInitialTextSection, bool NoFinalize = false);
-
- void AddDirectiveHandler(MCAsmParserExtension *Object,
- StringRef Directive,
- DirectiveHandler Handler) {
- DirectiveMap[Directive] = std::make_pair(Object, Handler);
- }
-
-public:
- TargetAsmParser &getTargetParser() const { return *TargetParser; }
- void setTargetParser(TargetAsmParser &P);
-
- /// @name MCAsmParser Interface
- /// {
-
- virtual SourceMgr &getSourceManager() { return SrcMgr; }
- virtual MCAsmLexer &getLexer() { return Lexer; }
- virtual MCContext &getContext() { return Ctx; }
- virtual MCStreamer &getStreamer() { return Out; }
-
- virtual void Warning(SMLoc L, const Twine &Meg);
- virtual bool Error(SMLoc L, const Twine &Msg);
-
- const AsmToken &Lex();
-
- bool ParseExpression(const MCExpr *&Res);
- virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
- virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
- virtual bool ParseAbsoluteExpression(int64_t &Res);
-
- /// }
-
-private:
- bool ParseStatement();
-
- void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
-
- /// EnterIncludeFile - Enter the specified file. This returns true on failure.
- bool EnterIncludeFile(const std::string &Filename);
-
- void EatToEndOfStatement();
-
- bool ParseAssignment(StringRef Name);
-
- bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
- bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
-
- /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
- /// and set \arg Res to the identifier contents.
- bool ParseIdentifier(StringRef &Res);
-
- // Directive Parsing.
- bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
- bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
- bool ParseDirectiveFill(); // ".fill"
- bool ParseDirectiveSpace(); // ".space"
- bool ParseDirectiveSet(); // ".set"
- bool ParseDirectiveOrg(); // ".org"
- // ".align{,32}", ".p2align{,w,l}"
- bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
-
- /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
- /// accepts a single symbol (which should be a label or an external).
- bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
- bool ParseDirectiveELFType(); // ELF specific ".type"
-
- bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
-
- bool ParseDirectiveAbort(); // ".abort"
- bool ParseDirectiveInclude(); // ".include"
-
- bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
- bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
- bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
- bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
-
- /// ParseEscapedString - Parse the current token as a string which may include
- /// escaped characters and return the string contents.
- bool ParseEscapedString(std::string &Data);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index d0ccd0f61748..b37d46cc5a25 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -14,6 +14,7 @@
namespace llvm {
class AsmToken;
+class MCAsmInfo;
class MCAsmLexer;
class MCAsmParserExtension;
class MCContext;
@@ -22,17 +23,24 @@ class MCStreamer;
class SMLoc;
class SourceMgr;
class StringRef;
+class Target;
+class TargetAsmParser;
class Twine;
/// MCAsmParser - Generic assembler parser interface, for use by target specific
/// assembly parsers.
class MCAsmParser {
public:
- typedef bool (MCAsmParserExtension::*DirectiveHandler)(StringRef, SMLoc);
+ typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc);
private:
MCAsmParser(const MCAsmParser &); // DO NOT IMPLEMENT
void operator=(const MCAsmParser &); // DO NOT IMPLEMENT
+
+ TargetAsmParser *TargetParser;
+
+ unsigned ShowParsedOperands : 1;
+
protected: // Can only create subclasses.
MCAsmParser();
@@ -52,6 +60,15 @@ public:
/// getStreamer - Return the output streamer for the assembler.
virtual MCStreamer &getStreamer() = 0;
+ TargetAsmParser &getTargetParser() const { return *TargetParser; }
+ void setTargetParser(TargetAsmParser &P);
+
+ bool getShowParsedOperands() const { return ShowParsedOperands; }
+ void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
+
+ /// Run - Run the parser on the input source buffer.
+ virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
+
/// Warning - Emit a warning at the location \arg L, with the message \arg
/// Msg.
virtual void Warning(SMLoc L, const Twine &Msg) = 0;
@@ -71,12 +88,17 @@ public:
const AsmToken &getTok();
/// \brief Report an error at the current lexer location.
- bool TokError(const char *Msg);
+ bool TokError(const Twine &Msg);
/// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
/// and set \arg Res to the identifier contents.
virtual bool ParseIdentifier(StringRef &Res) = 0;
+ /// \brief Parse up to the end of statement and return the contents from the
+ /// current token until the end of the statement; the current token on exit
+ /// will be either the EndOfStatement or EOF.
+ virtual StringRef ParseStringToEndOfStatement() = 0;
+
/// ParseExpression - Parse an arbitrary expression.
///
/// @param Res - The value of the expression. The result is undefined
@@ -102,6 +124,10 @@ public:
virtual bool ParseAbsoluteExpression(int64_t &Res) = 0;
};
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *createMCAsmParser(const Target &, SourceMgr &, MCContext &,
+ MCStreamer &, const MCAsmInfo &);
+
} // End llvm namespace
#endif
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index ad9ccf79d12a..95184cdfcf32 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -11,9 +11,11 @@
#define LLVM_MC_MCASMPARSEREXTENSION_H
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SMLoc.h"
namespace llvm {
+class Twine;
/// \brief Generic interface for extending the MCAsmParser,
/// which is implemented by target and object file assembly parser
@@ -27,6 +29,15 @@ class MCAsmParserExtension {
protected:
MCAsmParserExtension();
+ // Helper template for implementing static dispatch functions.
+ template<typename T, bool (T::*Handler)(StringRef, SMLoc)>
+ static bool HandleDirective(MCAsmParserExtension *Target,
+ StringRef Directive,
+ SMLoc DirectiveLoc) {
+ T *Obj = static_cast<T*>(Target);
+ return (Obj->*Handler)(Directive, DirectiveLoc);
+ }
+
public:
virtual ~MCAsmParserExtension();
@@ -49,15 +60,14 @@ public:
bool Error(SMLoc L, const Twine &Msg) {
return getParser().Error(L, Msg);
}
+ bool TokError(const Twine &Msg) {
+ return getParser().TokError(Msg);
+ }
const AsmToken &Lex() { return getParser().Lex(); }
const AsmToken &getTok() { return getParser().getTok(); }
- bool TokError(const char *Msg) {
- return getParser().TokError(Msg);
- }
-
/// @}
};
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 7c2f5beb7473..99fa5adae977 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -12,6 +12,7 @@
namespace llvm {
class SMLoc;
+class raw_ostream;
/// MCParsedAsmOperand - This abstract class represents a source-level assembly
/// instruction operand. It should be subclassed by target-specific code. This
@@ -23,9 +24,12 @@ public:
virtual ~MCParsedAsmOperand() {}
/// getStartLoc - Get the location of the first token of this operand.
- virtual SMLoc getStartLoc() const;
+ virtual SMLoc getStartLoc() const = 0;
/// getEndLoc - Get the location of the last token of this operand.
- virtual SMLoc getEndLoc() const;
+ virtual SMLoc getEndLoc() const = 0;
+
+ /// dump - Print a debug representation of the operand to the given stream.
+ virtual void dump(raw_ostream &OS) const = 0;
};
} // end namespace llvm.
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 5fe817180691..5de0bf58fe0c 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -35,13 +35,18 @@ class MCSectionELF : public MCSection {
/// IsExplicit - Indicates that this section comes from globals with an
/// explicit section specified.
bool IsExplicit;
+
+ /// EntrySize - The size of each entry in this section. This size only
+ /// makes sense for sections that contain fixed-sized entries. If a
+ /// section does not contain fixed-sized entries 'EntrySize' will be 0.
+ unsigned EntrySize;
private:
friend class MCContext;
MCSectionELF(StringRef Section, unsigned type, unsigned flags,
- SectionKind K, bool isExplicit)
+ SectionKind K, bool isExplicit, unsigned entrySize)
: MCSection(SV_ELF, K), SectionName(Section), Type(type), Flags(flags),
- IsExplicit(isExplicit) {}
+ IsExplicit(isExplicit), EntrySize(entrySize) {}
~MCSectionELF();
public:
@@ -169,6 +174,7 @@ public:
StringRef getSectionName() const { return SectionName; }
unsigned getType() const { return Type; }
unsigned getFlags() const { return Flags; }
+ unsigned getEntrySize() const { return EntrySize; }
void PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const;
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index aca7dd3a3957..1ce1b0e09d4a 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -54,6 +54,10 @@ namespace llvm {
/// kept up to date by SwitchSection.
const MCSection *CurSection;
+ /// PrevSection - This is the previous section code is being emitted to, it is
+ /// kept up to date by SwitchSection.
+ const MCSection *PrevSection;
+
public:
virtual ~MCStreamer();
@@ -96,6 +100,10 @@ namespace llvm {
/// emitting code to.
const MCSection *getCurrentSection() const { return CurSection; }
+ /// getPreviousSection - Return the previous section that the streamer is
+ /// emitting code to.
+ const MCSection *getPreviousSection() const { return PrevSection; }
+
/// SwitchSection - Set the current section where code is being emitted to
/// @p Section. This is required to update CurSection.
///
@@ -217,12 +225,13 @@ namespace llvm {
/// @param Size - The size of the integer (in bytes) to emit. This must
/// match a native machine width.
virtual void EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) = 0;
+ unsigned AddrSpace = 0) = 0;
/// EmitIntValue - Special case of EmitValue that avoids the client having
/// to pass in a MCExpr for constant integers.
- virtual void EmitIntValue(uint64_t Value, unsigned Size,unsigned AddrSpace);
-
+ virtual void EmitIntValue(uint64_t Value, unsigned Size,
+ unsigned AddrSpace = 0);
+
/// EmitSymbolValue - Special case of EmitValue that avoids the client
/// having to pass in a MCExpr for MCSymbols.
virtual void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
@@ -331,7 +340,7 @@ namespace llvm {
/// InstPrint.
///
/// \param CE - If given, a code emitter to use to show the instruction
- /// encoding inline with the assembly.
+ /// encoding inline with the assembly. This method takes ownership of \arg CE.
///
/// \param ShowInst - Whether to show the MCInst representation inline with
/// the assembly.
@@ -343,15 +352,26 @@ namespace llvm {
/// createMachOStreamer - Create a machine code streamer which will generate
/// Mach-O format object files.
+ ///
+ /// Takes ownership of \arg TAB and \arg CE.
MCStreamer *createMachOStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *CE,
bool RelaxAll = false);
/// createWinCOFFStreamer - Create a machine code streamer which will
/// generate Microsoft COFF format object files.
+ ///
+ /// Takes ownership of \arg TAB and \arg CE.
MCStreamer *createWinCOFFStreamer(MCContext &Ctx,
TargetAsmBackend &TAB,
- MCCodeEmitter &CE, raw_ostream &OS);
+ MCCodeEmitter &CE, raw_ostream &OS,
+ bool RelaxAll = false);
+
+ /// createELFStreamer - Create a machine code streamer which will generate
+ /// ELF format object files.
+ MCStreamer *createELFStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll = false);
/// createLoggingStreamer - Create a machine code streamer which just logs the
/// API calls and then dispatches to another streamer.
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 64ab6b7583ed..f5a80a3dced2 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -38,7 +38,6 @@ class MDString : public Value {
MDString(const MDString &); // DO NOT IMPLEMENT
StringRef Str;
-protected:
explicit MDString(LLVMContext &C, StringRef S);
public:
@@ -111,9 +110,8 @@ class MDNode : public Value, public FoldingSetNode {
void replaceOperand(MDNodeOperand *Op, Value *NewVal);
~MDNode();
-protected:
- explicit MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
- bool isFunctionLocal);
+ MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+ bool isFunctionLocal);
static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
FunctionLocalness FL, bool Insert = true);
@@ -128,6 +126,16 @@ public:
static MDNode *getIfExists(LLVMContext &Context, Value *const *Vals,
unsigned NumVals);
+
+ /// getTemporary - Return a temporary MDNode, for use in constructing
+ /// cyclic MDNode structures. A temporary MDNode is not uniqued,
+ /// may be RAUW'd, and must be manually deleted with deleteTemporary.
+ static MDNode *getTemporary(LLVMContext &Context, Value *const *Vals,
+ unsigned NumVals);
+
+ /// deleteTemporary - Deallocate a node created by getTemporary. The
+ /// node must not have any users.
+ static void deleteTemporary(MDNode *N);
/// getOperand - Return specified operand.
Value *getOperand(unsigned i) const;
@@ -149,9 +157,6 @@ public:
// critical code because it recursively visits all the MDNode's operands.
const Function *getFunction() const;
- // destroy - Delete this node. Only when there are no uses.
- void destroy();
-
/// Profile - calculate a unique identifier for this MDNode to collapse
/// duplicates
void Profile(FoldingSetNodeID &ID) const;
@@ -162,6 +167,9 @@ public:
return V->getValueID() == MDNodeVal;
}
private:
+ // destroy - Delete this node. Only when there are no uses.
+ void destroy();
+
bool isNotUniqued() const {
return (getSubclassDataFromValue() & NotUniquedBit) != 0;
}
@@ -175,31 +183,25 @@ private:
};
//===----------------------------------------------------------------------===//
-/// NamedMDNode - a tuple of MDNodes.
-/// NamedMDNode is always named. All NamedMDNode operand has a type of metadata.
-class NamedMDNode : public Value, public ilist_node<NamedMDNode> {
+/// NamedMDNode - a tuple of MDNodes. Despite its name, a NamedMDNode isn't
+/// itself an MDNode. NamedMDNodes belong to modules, have names, and contain
+/// lists of MDNodes.
+class NamedMDNode : public ilist_node<NamedMDNode> {
friend class SymbolTableListTraits<NamedMDNode, Module>;
friend struct ilist_traits<NamedMDNode>;
friend class LLVMContextImpl;
+ friend class Module;
NamedMDNode(const NamedMDNode &); // DO NOT IMPLEMENT
std::string Name;
Module *Parent;
- void *Operands; // SmallVector<WeakVH<MDNode>, 4>
+ void *Operands; // SmallVector<TrackingVH<MDNode>, 4>
void setParent(Module *M) { Parent = M; }
-protected:
- explicit NamedMDNode(LLVMContext &C, const Twine &N, MDNode*const *Vals,
- unsigned NumVals, Module *M = 0);
-public:
- static NamedMDNode *Create(LLVMContext &C, const Twine &N,
- MDNode *const *MDs,
- unsigned NumMDs, Module *M = 0) {
- return new NamedMDNode(C, N, MDs, NumMDs, M);
- }
- static NamedMDNode *Create(const NamedMDNode *NMD, Module *M = 0);
+ explicit NamedMDNode(const Twine &N);
+public:
/// eraseFromParent - Drop all references and remove the node from parent
/// module.
void eraseFromParent();
@@ -223,17 +225,11 @@ public:
/// addOperand - Add metadata operand.
void addOperand(MDNode *M);
- /// setName - Set the name of this named metadata.
- void setName(const Twine &NewName);
-
/// getName - Return a constant reference to this named metadata's name.
StringRef getName() const;
- /// Methods for support type inquiry through isa, cast, and dyn_cast:
- static inline bool classof(const NamedMDNode *) { return true; }
- static bool classof(const Value *V) {
- return V->getValueID() == NamedMDNodeVal;
- }
+ /// print - Implement operator<< on NamedMDNode.
+ void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
};
} // end llvm namespace
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 5fc0418759af..b7880ca2cb76 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -28,7 +28,6 @@ namespace llvm {
class FunctionType;
class GVMaterializer;
class LLVMContext;
-class MDSymbolTable;
template<> struct ilist_traits<Function>
: public SymbolTableListTraits<Function, Module> {
@@ -61,7 +60,7 @@ template<> struct ilist_traits<GlobalAlias>
};
template<> struct ilist_traits<NamedMDNode>
- : public SymbolTableListTraits<NamedMDNode, Module> {
+ : public ilist_default_traits<NamedMDNode> {
// createSentinel is used to get hold of a node that marks the end of
// the list...
NamedMDNode *createSentinel() const {
@@ -72,8 +71,8 @@ template<> struct ilist_traits<NamedMDNode>
NamedMDNode *provideInitialHead() const { return createSentinel(); }
NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
static void noteHead(NamedMDNode*, NamedMDNode*) {}
- void addNodeToList(NamedMDNode *N);
- void removeNodeFromList(NamedMDNode *N);
+ void addNodeToList(NamedMDNode *) {}
+ void removeNodeFromList(NamedMDNode *) {}
private:
mutable ilist_node<NamedMDNode> Sentinel;
};
@@ -100,7 +99,7 @@ public:
/// The type for the list of aliases.
typedef iplist<GlobalAlias> AliasListType;
/// The type for the list of named metadata.
- typedef iplist<NamedMDNode> NamedMDListType;
+ typedef ilist<NamedMDNode> NamedMDListType;
/// The type for the list of dependent libraries.
typedef std::vector<std::string> LibraryListType;
@@ -151,7 +150,7 @@ private:
std::string ModuleID; ///< Human readable identifier for the module
std::string TargetTriple; ///< Platform target triple Module compiled on
std::string DataLayout; ///< Target data description
- MDSymbolTable *NamedMDSymTab; ///< NamedMDNode names.
+ void *NamedMDSymTab; ///< NamedMDNode names.
friend class Constant;
@@ -237,8 +236,7 @@ public:
unsigned getMDKindID(StringRef Name) const;
/// getMDKindNames - Populate client supplied SmallVector with the name for
- /// custom metadata IDs registered in this LLVMContext. ID #0 is not used,
- /// so it is filled in as an empty string.
+ /// custom metadata IDs registered in this LLVMContext.
void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
/// @}
@@ -332,6 +330,10 @@ public:
/// NamedMDNode with the specified name is not found.
NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
+ /// eraseNamedMetadata - Remove the given NamedMDNode from this module
+ /// and delete it.
+ void eraseNamedMetadata(NamedMDNode *NMD);
+
/// @}
/// @name Type Accessors
/// @{
@@ -418,13 +420,6 @@ public:
static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
return &Module::AliasList;
}
- /// Get the Module's list of named metadata (constant).
- const NamedMDListType &getNamedMDList() const { return NamedMDList; }
- /// Get the Module's list of named metadata.
- NamedMDListType &getNamedMDList() { return NamedMDList; }
- static iplist<NamedMDNode> Module::*getSublistAccess(NamedMDNode *) {
- return &Module::NamedMDList;
- }
/// Get the symbol table of global variable and function identifiers
const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
/// Get the Module's symbol table of global variable and function identifiers.
@@ -433,10 +428,6 @@ public:
const TypeSymbolTable &getTypeSymbolTable() const { return *TypeSymTab; }
/// Get the Module's symbol table of types
TypeSymbolTable &getTypeSymbolTable() { return *TypeSymTab; }
- /// Get the symbol table of named metadata
- const MDSymbolTable &getMDSymbolTable() const { return *NamedMDSymTab; }
- /// Get the Module's symbol table of named metadata
- MDSymbolTable &getMDSymbolTable() { return *NamedMDSymTab; }
/// @}
/// @name Global Variable Iteration
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 5a5893140e05..f4c6eed2cf9a 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -29,11 +29,7 @@
#ifndef LLVM_PASS_H
#define LLVM_PASS_H
-#include "llvm/System/DataTypes.h"
-
#include <string>
-#include <utility>
-#include <vector>
namespace llvm {
@@ -50,7 +46,7 @@ class raw_ostream;
class StringRef;
// AnalysisID - Use the PassInfo to identify a pass...
-typedef const PassInfo* AnalysisID;
+typedef const void* AnalysisID;
/// Different types of internal pass managers. External pass managers
/// (PassManager and FunctionPassManager) are not represented here.
@@ -82,14 +78,13 @@ enum PassKind {
///
class Pass {
AnalysisResolver *Resolver; // Used to resolve analysis
- intptr_t PassID;
+ const void *PassID;
PassKind Kind;
void operator=(const Pass&); // DO NOT IMPLEMENT
Pass(const Pass &); // DO NOT IMPLEMENT
public:
- explicit Pass(PassKind K, intptr_t pid);
- explicit Pass(PassKind K, const void *pid);
+ explicit Pass(PassKind K, char &pid);
virtual ~Pass();
@@ -101,10 +96,10 @@ public:
///
virtual const char *getPassName() const;
- /// getPassInfo - Return the PassInfo data structure that corresponds to this
- /// pass... If the pass has not been registered, this will return null.
- ///
- const PassInfo *getPassInfo() const;
+ /// getPassID - Return the PassID number that corresponds to this pass.
+ virtual AnalysisID getPassID() const {
+ return PassID;
+ }
/// print - Print out the internal state of the pass. This is called by
/// Analyze to print out the contents of an analysis. Otherwise it is not
@@ -124,7 +119,7 @@ public:
/// Each pass is responsible for assigning a pass manager to itself.
/// PMS is the stack of available pass manager.
virtual void assignPassManager(PMStack &,
- PassManagerType = PMT_Unknown) {}
+ PassManagerType) {}
/// Check if available pass managers are suitable for this pass or not.
virtual void preparePassManager(PMStack &);
@@ -159,7 +154,7 @@ public:
/// an analysis interface through multiple inheritance. If needed, it should
/// override this to adjust the this pointer as needed for the specified pass
/// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *);
+ virtual void *getAdjustedAnalysisPointer(AnalysisID ID);
virtual ImmutablePass *getAsImmutablePass();
virtual PMDataManager *getAsPMDataManager();
@@ -170,14 +165,9 @@ public:
// dumpPassStructure - Implement the -debug-passes=PassStructure option
virtual void dumpPassStructure(unsigned Offset = 0);
- template<typename AnalysisClass>
- static const PassInfo *getClassPassInfo() {
- return lookupPassInfo(intptr_t(&AnalysisClass::ID));
- }
-
// lookupPassInfo - Return the pass info object for the specified pass class,
// or null if it is not known.
- static const PassInfo *lookupPassInfo(intptr_t TI);
+ static const PassInfo *lookupPassInfo(const void *TI);
// lookupPassInfo - Return the pass info object for the pass with the given
// argument string, or null if it is not known.
@@ -200,7 +190,7 @@ public:
/// don't have the class name available (use getAnalysisIfAvailable if you
/// do), but it can tell you if you need to preserve the pass at least.
///
- bool mustPreserveAnalysisID(const PassInfo *AnalysisID) const;
+ bool mustPreserveAnalysisID(char &AID) const;
/// getAnalysis<AnalysisType>() - This function is used by subclasses to get
/// to the analysis information that they claim to use by overriding the
@@ -213,10 +203,10 @@ public:
AnalysisType &getAnalysis(Function &F); // Defined in PassAnalysisSupport.h
template<typename AnalysisType>
- AnalysisType &getAnalysisID(const PassInfo *PI) const;
+ AnalysisType &getAnalysisID(AnalysisID PI) const;
template<typename AnalysisType>
- AnalysisType &getAnalysisID(const PassInfo *PI, Function &F);
+ AnalysisType &getAnalysisID(AnalysisID PI, Function &F);
};
@@ -235,13 +225,12 @@ public:
virtual bool runOnModule(Module &M) = 0;
virtual void assignPassManager(PMStack &PMS,
- PassManagerType T = PMT_ModulePassManager);
+ PassManagerType T);
/// Return what kind of Pass Manager can manage this pass.
virtual PassManagerType getPotentialPassManagerType() const;
- explicit ModulePass(intptr_t pid) : Pass(PT_Module, pid) {}
- explicit ModulePass(const void *pid) : Pass(PT_Module, pid) {}
+ explicit ModulePass(char &pid) : Pass(PT_Module, pid) {}
// Force out-of-line virtual method.
virtual ~ModulePass();
};
@@ -268,8 +257,7 @@ public:
///
bool runOnModule(Module &) { return false; }
- explicit ImmutablePass(intptr_t pid) : ModulePass(pid) {}
- explicit ImmutablePass(const void *pid)
+ explicit ImmutablePass(char &pid)
: ModulePass(pid) {}
// Force out-of-line virtual method.
@@ -287,8 +275,7 @@ public:
///
class FunctionPass : public Pass {
public:
- explicit FunctionPass(intptr_t pid) : Pass(PT_Function, pid) {}
- explicit FunctionPass(const void *pid) : Pass(PT_Function, pid) {}
+ explicit FunctionPass(char &pid) : Pass(PT_Function, pid) {}
/// createPrinterPass - Get a function printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
@@ -308,19 +295,8 @@ public:
///
virtual bool doFinalization(Module &);
- /// runOnModule - On a module, we run this pass by initializing,
- /// ronOnFunction'ing once for every function in the module, then by
- /// finalizing.
- ///
- virtual bool runOnModule(Module &M);
-
- /// run - On a function, we simply initialize, run the function, then
- /// finalize.
- ///
- bool run(Function &F);
-
virtual void assignPassManager(PMStack &PMS,
- PassManagerType T = PMT_FunctionPassManager);
+ PassManagerType T);
/// Return what kind of Pass Manager can manage this pass.
virtual PassManagerType getPotentialPassManagerType() const;
@@ -340,8 +316,7 @@ public:
///
class BasicBlockPass : public Pass {
public:
- explicit BasicBlockPass(intptr_t pid) : Pass(PT_BasicBlock, pid) {}
- explicit BasicBlockPass(const void *pid) : Pass(PT_BasicBlock, pid) {}
+ explicit BasicBlockPass(char &pid) : Pass(PT_BasicBlock, pid) {}
/// createPrinterPass - Get a function printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
@@ -371,14 +346,8 @@ public:
///
virtual bool doFinalization(Module &);
-
- // To run this pass on a function, we simply call runOnBasicBlock once for
- // each function.
- //
- bool runOnFunction(Function &F);
-
virtual void assignPassManager(PMStack &PMS,
- PassManagerType T = PMT_BasicBlockPassManager);
+ PassManagerType T);
/// Return what kind of Pass Manager can manage this pass.
virtual PassManagerType getPotentialPassManagerType() const;
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index 977d4f4e30d3..a3342d51386b 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -19,7 +19,6 @@
#ifndef LLVM_PASS_ANALYSIS_SUPPORT_H
#define LLVM_PASS_ANALYSIS_SUPPORT_H
-#include "llvm/Pass.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include <vector>
@@ -49,34 +48,37 @@ public:
// addRequired - Add the specified ID to the required set of the usage info
// for a pass.
//
- AnalysisUsage &addRequiredID(AnalysisID ID);
+ AnalysisUsage &addRequiredID(const void *ID);
+ AnalysisUsage &addRequiredID(char &ID);
template<class PassClass>
AnalysisUsage &addRequired() {
- return addRequiredID(Pass::getClassPassInfo<PassClass>());
+ return addRequiredID(PassClass::ID);
}
- AnalysisUsage &addRequiredTransitiveID(AnalysisID ID);
+ AnalysisUsage &addRequiredTransitiveID(char &ID);
template<class PassClass>
AnalysisUsage &addRequiredTransitive() {
- AnalysisID ID = Pass::getClassPassInfo<PassClass>();
- return addRequiredTransitiveID(ID);
+ return addRequiredTransitiveID(PassClass::ID);
}
// addPreserved - Add the specified ID to the set of analyses preserved by
// this pass
//
- AnalysisUsage &addPreservedID(AnalysisID ID) {
+ AnalysisUsage &addPreservedID(const void *ID) {
Preserved.push_back(ID);
return *this;
}
+ AnalysisUsage &addPreservedID(char &ID) {
+ Preserved.push_back(&ID);
+ return *this;
+ }
// addPreserved - Add the specified Pass class to the set of analyses
// preserved by this pass.
//
template<class PassClass>
AnalysisUsage &addPreserved() {
- assert(Pass::getClassPassInfo<PassClass>() && "Pass class not registered!");
- Preserved.push_back(Pass::getClassPassInfo<PassClass>());
+ Preserved.push_back(&PassClass::ID);
return *this;
}
@@ -85,12 +87,7 @@ public:
// This can be useful when a pass is trivially preserved, but may not be
// linked in. Be careful about spelling!
//
- AnalysisUsage &addPreserved(StringRef Arg) {
- const PassInfo *PI = Pass::lookupPassInfo(Arg);
- // If the pass exists, preserve it. Otherwise silently do nothing.
- if (PI) Preserved.push_back(PI);
- return *this;
- }
+ AnalysisUsage &addPreserved(StringRef Arg);
// setPreservesAll - Set by analyses that do not transform their input at all
void setPreservesAll() { PreservesAll = true; }
@@ -130,7 +127,7 @@ public:
inline PMDataManager &getPMDataManager() { return PM; }
// Find pass that is implementing PI.
- Pass *findImplPass(const PassInfo *PI) {
+ Pass *findImplPass(AnalysisID PI) {
Pass *ResultPass = 0;
for (unsigned i = 0; i < AnalysisImpls.size() ; ++i) {
if (AnalysisImpls[i].first == PI) {
@@ -142,10 +139,10 @@ public:
}
// Find pass that is implementing PI. Initialize pass for Function F.
- Pass *findImplPass(Pass *P, const PassInfo *PI, Function &F);
+ Pass *findImplPass(Pass *P, AnalysisID PI, Function &F);
- void addAnalysisImplsPair(const PassInfo *PI, Pass *P) {
- std::pair<const PassInfo*, Pass*> pir = std::make_pair(PI,P);
+ void addAnalysisImplsPair(AnalysisID PI, Pass *P) {
+ std::pair<AnalysisID, Pass*> pir = std::make_pair(PI,P);
AnalysisImpls.push_back(pir);
}
@@ -158,11 +155,11 @@ public:
// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist
Pass *getAnalysisIfAvailable(AnalysisID ID, bool Direction) const;
+private:
// AnalysisImpls - This keeps track of which passes implements the interfaces
// that are required by the current pass (to implement getAnalysis()).
- std::vector<std::pair<const PassInfo*, Pass*> > AnalysisImpls;
+ std::vector<std::pair<AnalysisID, Pass*> > AnalysisImpls;
-private:
// PassManager that is used to resolve analysis info
PMDataManager &PM;
};
@@ -179,8 +176,7 @@ template<typename AnalysisType>
AnalysisType *Pass::getAnalysisIfAvailable() const {
assert(Resolver && "Pass not resident in a PassManager object!");
- const PassInfo *PI = getClassPassInfo<AnalysisType>();
- if (PI == 0) return 0;
+ const void *PI = &AnalysisType::ID;
Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI, true);
if (ResultPass == 0) return 0;
@@ -199,11 +195,11 @@ AnalysisType *Pass::getAnalysisIfAvailable() const {
template<typename AnalysisType>
AnalysisType &Pass::getAnalysis() const {
assert(Resolver && "Pass has not been inserted into a PassManager object!");
- return getAnalysisID<AnalysisType>(getClassPassInfo<AnalysisType>());
+ return getAnalysisID<AnalysisType>(&AnalysisType::ID);
}
template<typename AnalysisType>
-AnalysisType &Pass::getAnalysisID(const PassInfo *PI) const {
+AnalysisType &Pass::getAnalysisID(AnalysisID PI) const {
assert(PI && "getAnalysis for unregistered pass!");
assert(Resolver&&"Pass has not been inserted into a PassManager object!");
// PI *must* appear in AnalysisImpls. Because the number of passes used
@@ -229,11 +225,11 @@ template<typename AnalysisType>
AnalysisType &Pass::getAnalysis(Function &F) {
assert(Resolver &&"Pass has not been inserted into a PassManager object!");
- return getAnalysisID<AnalysisType>(getClassPassInfo<AnalysisType>(), F);
+ return getAnalysisID<AnalysisType>(&AnalysisType::ID, F);
}
template<typename AnalysisType>
-AnalysisType &Pass::getAnalysisID(const PassInfo *PI, Function &F) {
+AnalysisType &Pass::getAnalysisID(AnalysisID PI, Function &F) {
assert(PI && "getAnalysis for unregistered pass!");
assert(Resolver && "Pass has not been inserted into a PassManager object!");
// PI *must* appear in AnalysisImpls. Because the number of passes used
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index 8de0f8342d78..c8b5dcaf0f2d 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -22,7 +22,6 @@
namespace llvm {
class Pass;
-class ModulePass;
class Module;
class PassManagerImpl;
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index 81b7e7af816e..17f4a0592fbb 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -14,11 +14,11 @@
#ifndef LLVM_PASSMANAGERS_H
#define LLVM_PASSMANAGERS_H
-#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/DenseMap.h"
-#include <deque>
+#include <vector>
#include <map>
//===----------------------------------------------------------------------===//
@@ -96,14 +96,8 @@ namespace llvm {
class StringRef;
class Value;
class Timer;
+ class PMDataManager;
-/// FunctionPassManager and PassManager, two top level managers, serve
-/// as the public interface of pass manager infrastructure.
-enum TopLevelManagerType {
- TLM_Function, // FunctionPassManager
- TLM_Pass // PassManager
-};
-
// enums for debugging strings
enum PassDebuggingString {
EXECUTION_MSG, // "Executing Pass '"
@@ -138,30 +132,28 @@ public:
//===----------------------------------------------------------------------===//
// PMStack
//
-/// PMStack
+/// PMStack - This class implements a stack data structure of PMDataManager
+/// pointers.
+///
/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers
/// using PMStack. Each Pass implements assignPassManager() to connect itself
/// with appropriate manager. assignPassManager() walks PMStack to find
/// suitable manager.
-///
-/// PMStack is just a wrapper around standard deque that overrides pop() and
-/// push() methods.
class PMStack {
public:
- typedef std::deque<PMDataManager *>::reverse_iterator iterator;
- iterator begin() { return S.rbegin(); }
- iterator end() { return S.rend(); }
-
- void handleLastUserOverflow();
+ typedef std::vector<PMDataManager *>::const_reverse_iterator iterator;
+ iterator begin() const { return S.rbegin(); }
+ iterator end() const { return S.rend(); }
void pop();
- inline PMDataManager *top() { return S.back(); }
+ PMDataManager *top() const { return S.back(); }
void push(PMDataManager *PM);
- inline bool empty() { return S.empty(); }
+ bool empty() const { return S.empty(); }
+
+ void dump() const;
- void dump();
private:
- std::deque<PMDataManager *> S;
+ std::vector<PMDataManager *> S;
};
@@ -171,21 +163,26 @@ private:
/// PMTopLevelManager manages LastUser info and collects common APIs used by
/// top level pass managers.
class PMTopLevelManager {
-public:
+protected:
+ explicit PMTopLevelManager(PMDataManager *PMDM);
virtual unsigned getNumContainedManagers() const {
return (unsigned)PassManagers.size();
}
- /// Schedule pass P for execution. Make sure that passes required by
- /// P are run before P is run. Update analysis info maintained by
- /// the manager. Remove dead passes. This is a recursive function.
- void schedulePass(Pass *P);
+ void initializeAllAnalysisInfo();
+private:
/// This is implemented by top level pass manager and used by
/// schedulePass() to add analysis info passes that are not available.
virtual void addTopLevelPass(Pass *P) = 0;
+public:
+ /// Schedule pass P for execution. Make sure that passes required by
+ /// P are run before P is run. Update analysis info maintained by
+ /// the manager. Remove dead passes. This is a recursive function.
+ void schedulePass(Pass *P);
+
/// Set pass P as the last user of the given analysis passes.
void setLastUser(SmallVector<Pass *, 12> &AnalysisPasses, Pass *P);
@@ -200,7 +197,6 @@ public:
/// Find analysis usage information for the pass P.
AnalysisUsage *findAnalysisUsage(Pass *P);
- explicit PMTopLevelManager(enum TopLevelManagerType t);
virtual ~PMTopLevelManager();
/// Add immutable pass and initialize it.
@@ -227,8 +223,6 @@ public:
void dumpPasses() const;
void dumpArguments() const;
- void initializeAllAnalysisInfo();
-
// Active Pass Managers
PMStack activeStack;
@@ -302,7 +296,7 @@ public:
/// through getAnalysis interface.
virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
- virtual Pass *getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F);
+ virtual Pass *getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F);
/// Initialize available analysis information.
void initializeAnalysisInfo() {
@@ -414,7 +408,7 @@ class FPPassManager : public ModulePass, public PMDataManager {
public:
static char ID;
explicit FPPassManager(int Depth)
- : ModulePass(&ID), PMDataManager(Depth) { }
+ : ModulePass(ID), PMDataManager(Depth) { }
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
new file mode 100644
index 000000000000..59071391520a
--- /dev/null
+++ b/include/llvm/PassRegistry.h
@@ -0,0 +1,71 @@
+//===- llvm/PassRegistry.h - Pass Information Registry ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines PassRegistry, a class that is used in the initialization
+// and registration of passes. At initialization, passes are registered with
+// the PassRegistry, which is later provided to the PassManager for dependency
+// resolution and similar tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSREGISTRY_H
+#define LLVM_PASSREGISTRY_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/System/DataTypes.h"
+#include "llvm/System/Mutex.h"
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+class PassInfo;
+struct PassRegistrationListener;
+
+class PassRegistry {
+ /// Guards the contents of this class.
+ mutable sys::SmartMutex<true> Lock;
+
+ /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+ typedef std::map<const void*, const PassInfo*> MapType;
+ MapType PassInfoMap;
+
+ typedef StringMap<const PassInfo*> StringMapType;
+ StringMapType PassInfoStringMap;
+
+ /// AnalysisGroupInfo - Keep track of information for each analysis group.
+ struct AnalysisGroupInfo {
+ std::set<const PassInfo *> Implementations;
+ };
+ std::map<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+
+ std::vector<PassRegistrationListener*> Listeners;
+
+public:
+ static PassRegistry *getPassRegistry();
+
+ const PassInfo *getPassInfo(const void *TI) const;
+ const PassInfo *getPassInfo(StringRef Arg) const;
+
+ void registerPass(const PassInfo &PI);
+ void unregisterPass(const PassInfo &PI);
+
+ /// Analysis Group Mechanisms.
+ void registerAnalysisGroup(const void *InterfaceID, const void *PassID,
+ PassInfo& Registeree, bool isDefault);
+
+ void enumerateWith(PassRegistrationListener *L);
+ void addRegistrationListener(PassRegistrationListener* L);
+ void removeRegistrationListener(PassRegistrationListener *L);
+};
+
+}
+
+#endif
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index b0183513386d..0f559d6df736 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -22,11 +22,10 @@
#define LLVM_PASS_SUPPORT_H
#include "Pass.h"
+#include "llvm/PassRegistry.h"
namespace llvm {
-class TargetMachine;
-
//===---------------------------------------------------------------------------
/// PassInfo class - An instance of this class exists for every pass known by
/// the system, and can be obtained from a live Pass by calling its
@@ -40,7 +39,7 @@ public:
private:
const char *const PassName; // Nice name for Pass
const char *const PassArgument; // Command Line argument to run this pass
- const intptr_t PassID;
+ const void *PassID;
const bool IsCFGOnlyPass; // Pass only looks at the CFG.
const bool IsAnalysis; // True if an analysis pass.
const bool IsAnalysisGroup; // True if an analysis group.
@@ -51,18 +50,17 @@ private:
public:
/// PassInfo ctor - Do not call this directly, this should only be invoked
/// through RegisterPass.
- PassInfo(const char *name, const char *arg, intptr_t pi,
- NormalCtor_t normal = 0,
- bool isCFGOnly = false, bool is_analysis = false)
+ PassInfo(const char *name, const char *arg, const void *pi,
+ NormalCtor_t normal, bool isCFGOnly, bool is_analysis)
: PassName(name), PassArgument(arg), PassID(pi),
IsCFGOnlyPass(isCFGOnly),
IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) {
- registerPass();
+ PassRegistry::getPassRegistry()->registerPass(*this);
}
/// PassInfo ctor - Do not call this directly, this should only be invoked
/// through RegisterPass. This version is for use by analysis groups; it
/// does not auto-register the pass.
- PassInfo(const char *name, intptr_t pi)
+ PassInfo(const char *name, const void *pi)
: PassName(name), PassArgument(""), PassID(pi),
IsCFGOnlyPass(false),
IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(0) {
@@ -80,11 +78,11 @@ public:
/// getTypeInfo - Return the id object for the pass...
/// TODO : Rename
- intptr_t getTypeInfo() const { return PassID; }
+ const void *getTypeInfo() const { return PassID; }
/// Return true if this PassID implements the specified ID pointer.
- bool isPassID(void *IDPtr) const {
- return PassID == (intptr_t)IDPtr;
+ bool isPassID(const void *IDPtr) const {
+ return PassID == IDPtr;
}
/// isAnalysisGroup - Return true if this is an analysis group, not a normal
@@ -126,15 +124,13 @@ public:
return ItfImpl;
}
-protected:
- void registerPass();
- void unregisterPass();
-
private:
void operator=(const PassInfo &); // do not implement
PassInfo(const PassInfo &); // do not implement
};
+#define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \
+ static RegisterPass<passName> passName ## _info(arg, name, cfg, analysis)
template<typename PassName>
Pass *callDefaultCtor() { return new PassName(); }
@@ -162,9 +158,10 @@ struct RegisterPass : public PassInfo {
// Register Pass using default constructor...
RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false,
bool is_analysis = false)
- : PassInfo(Name, PassArg, intptr_t(&passName::ID),
+ : PassInfo(Name, PassArg, &passName::ID,
PassInfo::NormalCtor_t(callDefaultCtor<passName>),
CFGOnly, is_analysis) {
+
}
};
@@ -191,8 +188,8 @@ struct RegisterPass : public PassInfo {
class RegisterAGBase : public PassInfo {
protected:
RegisterAGBase(const char *Name,
- intptr_t InterfaceID,
- intptr_t PassID = 0,
+ const void *InterfaceID,
+ const void *PassID = 0,
bool isDefault = false);
};
@@ -200,16 +197,18 @@ template<typename Interface, bool Default = false>
struct RegisterAnalysisGroup : public RegisterAGBase {
explicit RegisterAnalysisGroup(PassInfo &RPB)
: RegisterAGBase(RPB.getPassName(),
- intptr_t(&Interface::ID), RPB.getTypeInfo(),
+ &Interface::ID, RPB.getTypeInfo(),
Default) {
}
explicit RegisterAnalysisGroup(const char *Name)
- : RegisterAGBase(Name, intptr_t(&Interface::ID)) {
+ : RegisterAGBase(Name, &Interface::ID) {
}
};
-
+#define INITIALIZE_AG_PASS(passName, agName, arg, name, cfg, analysis, def) \
+ static RegisterPass<passName> passName ## _info(arg, name, cfg, analysis); \
+ static RegisterAnalysisGroup<agName, def> passName ## _ag(passName ## _info)
//===---------------------------------------------------------------------------
/// PassRegistrationListener class - This class is meant to be derived from by
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 69137bff71e6..78254ae9921f 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -48,6 +48,11 @@ namespace COFF {
uint16_t Characteristics;
};
+ enum MachineTypes {
+ IMAGE_FILE_MACHINE_I386 = 0x14C,
+ IMAGE_FILE_MACHINE_AMD64 = 0x8664
+ };
+
struct symbol {
char Name[NameSize];
uint32_t Value;
@@ -67,6 +72,12 @@ namespace COFF {
SF_WeakReference = 0x01000000
};
+ enum SymbolSectionNumber {
+ IMAGE_SYM_DEBUG = -2,
+ IMAGE_SYM_ABSOLUTE = -1,
+ IMAGE_SYM_UNDEFINED = 0
+ };
+
/// Storage class tells where and what the symbol represents
enum SymbolStorageClass {
IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, ///< Physical end of function
@@ -128,7 +139,7 @@ namespace COFF {
IMAGE_SYM_DTYPE_ARRAY = 3, ///< An array of base type.
/// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
- SCT_COMPLEX_TYPE_SHIFT = 4
+ SCT_COMPLEX_TYPE_SHIFT = 8
};
struct section {
@@ -199,10 +210,28 @@ namespace COFF {
IMAGE_REL_I386_SECREL = 0x000B,
IMAGE_REL_I386_TOKEN = 0x000C,
IMAGE_REL_I386_SECREL7 = 0x000D,
- IMAGE_REL_I386_REL32 = 0x0014
+ IMAGE_REL_I386_REL32 = 0x0014,
+
+ IMAGE_REL_AMD64_ABSOLUTE = 0x0000,
+ IMAGE_REL_AMD64_ADDR64 = 0x0001,
+ IMAGE_REL_AMD64_ADDR32 = 0x0002,
+ IMAGE_REL_AMD64_ADDR32NB = 0x0003,
+ IMAGE_REL_AMD64_REL32 = 0x0004,
+ IMAGE_REL_AMD64_REL32_1 = 0x0005,
+ IMAGE_REL_AMD64_REL32_2 = 0x0006,
+ IMAGE_REL_AMD64_REL32_3 = 0x0007,
+ IMAGE_REL_AMD64_REL32_4 = 0x0008,
+ IMAGE_REL_AMD64_REL32_5 = 0x0009,
+ IMAGE_REL_AMD64_SECTION = 0x000A,
+ IMAGE_REL_AMD64_SECREL = 0x000B,
+ IMAGE_REL_AMD64_SECREL7 = 0x000C,
+ IMAGE_REL_AMD64_TOKEN = 0x000D,
+ IMAGE_REL_AMD64_SREL32 = 0x000E,
+ IMAGE_REL_AMD64_PAIR = 0x000F,
+ IMAGE_REL_AMD64_SSPAN32 = 0x0010
};
- enum {
+ enum COMDATType {
IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
IMAGE_COMDAT_SELECT_ANY,
IMAGE_COMDAT_SELECT_SAME_SIZE,
@@ -211,6 +240,58 @@ namespace COFF {
IMAGE_COMDAT_SELECT_LARGEST
};
+ // Auxiliary Symbol Formats
+ struct AuxiliaryFunctionDefinition {
+ uint32_t TagIndex;
+ uint32_t TotalSize;
+ uint32_t PointerToLinenumber;
+ uint32_t PointerToNextFunction;
+ uint8_t unused[2];
+ };
+
+ struct AuxiliarybfAndefSymbol {
+ uint8_t unused1[4];
+ uint16_t Linenumber;
+ uint8_t unused2[6];
+ uint32_t PointerToNextFunction;
+ uint8_t unused3[2];
+ };
+
+ struct AuxiliaryWeakExternal {
+ uint32_t TagIndex;
+ uint32_t Characteristics;
+ uint8_t unused[10];
+ };
+
+ /// These are not documented in the spec, but are located in WinNT.h.
+ enum WeakExternalCharacteristics {
+ IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1,
+ IMAGE_WEAK_EXTERN_SEARCH_LIBRARY = 2,
+ IMAGE_WEAK_EXTERN_SEARCH_ALIAS = 3
+ };
+
+ struct AuxiliaryFile {
+ uint8_t FileName[18];
+ };
+
+ struct AuxiliarySectionDefinition {
+ uint32_t Length;
+ uint16_t NumberOfRelocations;
+ uint16_t NumberOfLinenumbers;
+ uint32_t CheckSum;
+ uint16_t Number;
+ uint8_t Selection;
+ uint8_t unused[3];
+ };
+
+ union Auxiliary {
+ AuxiliaryFunctionDefinition FunctionDefinition;
+ AuxiliarybfAndefSymbol bfAndefSymbol;
+ AuxiliaryWeakExternal WeakExternal;
+ AuxiliaryFile File;
+ AuxiliarySectionDefinition SectionDefinition;
+ };
+
} // End namespace llvm.
} // End namespace COFF.
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index 38ee08bedf82..9b6a4098b617 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -49,13 +49,13 @@ protected:
PointerIntPair<InstrTy*, 1, bool> I;
public:
CallSiteBase() : I(0, false) {}
- CallSiteBase(CallTy *CI) : I(reinterpret_cast<InstrTy*>(CI), true) {}
- CallSiteBase(InvokeTy *II) : I(reinterpret_cast<InstrTy*>(II), false) {}
+ CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
+ CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
CallSiteBase(ValTy *II) { *this = get(II); }
CallSiteBase(InstrTy *II) {
assert(II && "Null instruction given?");
*this = get(II);
- assert(I.getPointer());
+ assert(I.getPointer() && "Not a call?");
}
/// CallSiteBase::get - This static method is sort of like a constructor. It
@@ -66,9 +66,9 @@ public:
static CallSiteBase get(ValTy *V) {
if (InstrTy *II = dyn_cast<InstrTy>(V)) {
if (II->getOpcode() == Instruction::Call)
- return CallSiteBase(reinterpret_cast<CallTy*>(II));
+ return CallSiteBase(static_cast<CallTy*>(II));
else if (II->getOpcode() == Instruction::Invoke)
- return CallSiteBase(reinterpret_cast<InvokeTy*>(II));
+ return CallSiteBase(static_cast<InvokeTy*>(II));
}
return CallSiteBase();
}
@@ -116,13 +116,13 @@ public:
ValTy *getArgument(unsigned ArgNo) const {
assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
- return *(arg_begin()+ArgNo);
+ return *(arg_begin() + ArgNo);
}
void setArgument(unsigned ArgNo, Value* newVal) {
assert(getInstruction() && "Not a call or invoke instruction!");
assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
- getInstruction()->setOperand(getArgumentOffset() + ArgNo, newVal);
+ getInstruction()->setOperand(ArgNo, newVal);
}
/// Given a value use iterator, returns the argument that corresponds to it.
@@ -143,7 +143,7 @@ public:
IterTy arg_begin() const {
assert(getInstruction() && "Not a call or invoke instruction!");
// Skip non-arguments
- return (*this)->op_begin() + getArgumentOffset();
+ return (*this)->op_begin();
}
IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); }
@@ -253,44 +253,21 @@ public:
}
private:
- /// Returns the operand number of the first argument
- unsigned getArgumentOffset() const {
- if (isCall())
- return CallInst::ArgOffset; // Skip Function (ATM)
- else
- return 0; // Args are at the front
- }
-
unsigned getArgumentEndOffset() const {
if (isCall())
- return CallInst::ArgOffset ? 0 : 1; // Unchanged (ATM)
+ return 1; // Skip Callee
else
- return 3; // Skip BB, BB, Function
+ return 3; // Skip BB, BB, Callee
}
IterTy getCallee() const {
- // FIXME: this is slow, since we do not have the fast versions
- // of the op_*() functions here. See CallSite::getCallee.
- //
- if (isCall())
- return CallInst::ArgOffset
- ? getInstruction()->op_begin() // Unchanged
- : getInstruction()->op_end() - 1; // Skip Function
- else
- return getInstruction()->op_end() - 3; // Skip BB, BB, Function
+ if (isCall()) // Skip Callee
+ return cast<CallInst>(getInstruction())->op_end() - 1;
+ else // Skip BB, BB, Callee
+ return cast<InvokeInst>(getInstruction())->op_end() - 3;
}
};
-/// ImmutableCallSite - establish a view to a call site for examination
-class ImmutableCallSite : public CallSiteBase<> {
- typedef CallSiteBase<> Base;
-public:
- ImmutableCallSite(const Value* V) : Base(V) {}
- ImmutableCallSite(const CallInst *CI) : Base(CI) {}
- ImmutableCallSite(const InvokeInst *II) : Base(II) {}
- ImmutableCallSite(const Instruction *II) : Base(II) {}
-};
-
class CallSite : public CallSiteBase<Function, Value, User, Instruction,
CallInst, InvokeInst, User::op_iterator> {
typedef CallSiteBase<Function, Value, User, Instruction,
@@ -298,6 +275,7 @@ class CallSite : public CallSiteBase<Function, Value, User, Instruction,
public:
CallSite() {}
CallSite(Base B) : Base(B) {}
+ CallSite(Value* V) : Base(V) {}
CallSite(CallInst *CI) : Base(CI) {}
CallSite(InvokeInst *II) : Base(II) {}
CallSite(Instruction *II) : Base(II) {}
@@ -322,6 +300,17 @@ private:
User::op_iterator getCallee() const;
};
+/// ImmutableCallSite - establish a view to a call site for examination
+class ImmutableCallSite : public CallSiteBase<> {
+ typedef CallSiteBase<> Base;
+public:
+ ImmutableCallSite(const Value* V) : Base(V) {}
+ ImmutableCallSite(const CallInst *CI) : Base(CI) {}
+ ImmutableCallSite(const InvokeInst *II) : Base(II) {}
+ ImmutableCallSite(const Instruction *II) : Base(II) {}
+ ImmutableCallSite(CallSite CS) : Base(CS.getInstruction()) {}
+};
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h
index dccbfadfa305..c589171bbafe 100644
--- a/include/llvm/Support/Casting.h
+++ b/include/llvm/Support/Casting.h
@@ -236,73 +236,6 @@ inline typename cast_retty<X, Y>::ret_type dyn_cast_or_null(const Y &Val) {
return (Val && isa<X>(Val)) ? cast<X, Y>(Val) : 0;
}
-
-#ifdef DEBUG_CAST_OPERATORS
-#include "llvm/Support/raw_ostream.h"
-
-struct bar {
- bar() {}
-private:
- bar(const bar &);
-};
-struct foo {
- void ext() const;
- /* static bool classof(const bar *X) {
- cerr << "Classof: " << X << "\n";
- return true;
- }*/
-};
-
-template <> struct isa_impl<foo,bar> {
- static inline bool doit(const bar &Val) {
- dbgs() << "Classof: " << &Val << "\n";
- return true;
- }
-};
-
-
-bar *fub();
-void test(bar &B1, const bar *B2) {
- // test various configurations of const
- const bar &B3 = B1;
- const bar *const B4 = B2;
-
- // test isa
- if (!isa<foo>(B1)) return;
- if (!isa<foo>(B2)) return;
- if (!isa<foo>(B3)) return;
- if (!isa<foo>(B4)) return;
-
- // test cast
- foo &F1 = cast<foo>(B1);
- const foo *F3 = cast<foo>(B2);
- const foo *F4 = cast<foo>(B2);
- const foo &F8 = cast<foo>(B3);
- const foo *F9 = cast<foo>(B4);
- foo *F10 = cast<foo>(fub());
-
- // test cast_or_null
- const foo *F11 = cast_or_null<foo>(B2);
- const foo *F12 = cast_or_null<foo>(B2);
- const foo *F13 = cast_or_null<foo>(B4);
- const foo *F14 = cast_or_null<foo>(fub()); // Shouldn't print.
-
- // These lines are errors...
- //foo *F20 = cast<foo>(B2); // Yields const foo*
- //foo &F21 = cast<foo>(B3); // Yields const foo&
- //foo *F22 = cast<foo>(B4); // Yields const foo*
- //foo &F23 = cast_or_null<foo>(B1);
- //const foo &F24 = cast_or_null<foo>(B3);
-}
-
-bar *fub() { return 0; }
-void main() {
- bar B;
- test(B, &B);
-}
-
-#endif
-
} // End llvm namespace
#endif
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 61c3256d3845..9ae3d6af32ee 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -31,7 +31,7 @@
#include <vector>
namespace llvm {
-
+
/// cl Namespace - This namespace contains all of the command line option
/// processing machinery. It is intentionally a short name to make qualified
/// usage concise.
@@ -443,16 +443,23 @@ protected:
template <class DataType>
class parser : public generic_parser_base {
protected:
- SmallVector<std::pair<const char *,
- std::pair<DataType, const char *> >, 8> Values;
+ class OptionInfo {
+ public:
+ OptionInfo(const char *name, DataType v, const char *helpStr) :
+ Name(name), V(v), HelpStr(helpStr) {}
+ const char *Name;
+ DataType V;
+ const char *HelpStr;
+ };
+ SmallVector<OptionInfo, 8> Values;
public:
typedef DataType parser_data_type;
// Implement virtual functions needed by generic_parser_base
unsigned getNumOptions() const { return unsigned(Values.size()); }
- const char *getOption(unsigned N) const { return Values[N].first; }
+ const char *getOption(unsigned N) const { return Values[N].Name; }
const char *getDescription(unsigned N) const {
- return Values[N].second.second;
+ return Values[N].HelpStr;
}
// parse - Return true on error.
@@ -465,8 +472,8 @@ public:
for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
i != e; ++i)
- if (Values[i].first == ArgVal) {
- V = Values[i].second.first;
+ if (Values[i].Name == ArgVal) {
+ V = Values[i].V;
return false;
}
@@ -478,8 +485,8 @@ public:
template <class DT>
void addLiteralOption(const char *Name, const DT &V, const char *HelpStr) {
assert(findOption(Name) == Values.size() && "Option already exists!");
- Values.push_back(std::make_pair(Name,
- std::make_pair(static_cast<DataType>(V),HelpStr)));
+ OptionInfo X(Name, static_cast<DataType>(V), HelpStr);
+ Values.push_back(X);
MarkOptionsChanged();
}
@@ -781,7 +788,7 @@ public:
DataType &getValue() { check(); return *Location; }
const DataType &getValue() const { check(); return *Location; }
-
+
operator DataType() const { return this->getValue(); }
};
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index b2ce76d285ef..14b36f80522d 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -24,7 +24,10 @@
/// into a shared library, then the class will be accessible from outside the
/// the library. Can also be used to mark variables and functions, making them
/// accessible from outside any shared library they are linked into.
-#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#if defined(__MINGW32__) || defined(__CYGWIN__)
+#define LLVM_LIBRARY_VISIBILITY
+#define LLVM_GLOBAL_VISIBILITY __declspec(dllexport)
+#elif (__GNUC__ >= 4)
#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
#define LLVM_GLOBAL_VISIBILITY __attribute__ ((visibility("default")))
#else
diff --git a/include/llvm/Support/ConstantRange.h b/include/llvm/Support/ConstantRange.h
index 6342c6f1bdde..29086b2ac4f2 100644
--- a/include/llvm/Support/ConstantRange.h
+++ b/include/llvm/Support/ConstantRange.h
@@ -41,8 +41,6 @@ namespace llvm {
///
class ConstantRange {
APInt Lower, Upper;
- static ConstantRange intersect1Wrapped(const ConstantRange &LHS,
- const ConstantRange &RHS);
public:
/// Initialize a full (the default) or empty set for the specified bit width.
@@ -196,39 +194,45 @@ public:
ConstantRange sextOrTrunc(uint32_t BitWidth) const;
/// add - Return a new range representing the possible values resulting
- /// from an addition of a value in this range and a value in Other.
+ /// from an addition of a value in this range and a value in \p Other.
ConstantRange add(const ConstantRange &Other) const;
+ /// sub - Return a new range representing the possible values resulting
+ /// from a subtraction of a value in this range and a value in \p Other.
+ ConstantRange sub(const ConstantRange &Other) const;
+
/// multiply - Return a new range representing the possible values resulting
- /// from a multiplication of a value in this range and a value in Other.
+ /// from a multiplication of a value in this range and a value in \p Other.
/// TODO: This isn't fully implemented yet.
ConstantRange multiply(const ConstantRange &Other) const;
/// smax - Return a new range representing the possible values resulting
- /// from a signed maximum of a value in this range and a value in Other.
+ /// from a signed maximum of a value in this range and a value in \p Other.
ConstantRange smax(const ConstantRange &Other) const;
/// umax - Return a new range representing the possible values resulting
- /// from an unsigned maximum of a value in this range and a value in Other.
+ /// from an unsigned maximum of a value in this range and a value in \p Other.
ConstantRange umax(const ConstantRange &Other) const;
/// udiv - Return a new range representing the possible values resulting
- /// from an unsigned division of a value in this range and a value in Other.
- /// TODO: This isn't fully implemented yet.
+ /// from an unsigned division of a value in this range and a value in
+ /// \p Other.
ConstantRange udiv(const ConstantRange &Other) const;
/// shl - Return a new range representing the possible values resulting
- /// from a left shift of a value in this range by the Amount value.
- ConstantRange shl(const ConstantRange &Amount) const;
-
- /// ashr - Return a new range representing the possible values resulting from
- /// an arithmetic right shift of a value in this range by the Amount value.
- ConstantRange ashr(const ConstantRange &Amount) const;
+ /// from a left shift of a value in this range by a value in \p Other.
+ /// TODO: This isn't fully implemented yet.
+ ConstantRange shl(const ConstantRange &Other) const;
- /// shr - Return a new range representing the possible values resulting
- /// from a logical right shift of a value in this range by the Amount value.
- ConstantRange lshr(const ConstantRange &Amount) const;
+ /// lshr - Return a new range representing the possible values resulting
+ /// from a logical right shift of a value in this range and a value in
+ /// \p Other.
+ ConstantRange lshr(const ConstantRange &Other) const;
+ /// inverse - Return a new range that is the logical not of the current set.
+ ///
+ ConstantRange inverse() const;
+
/// print - Print out the bounds to a stream...
///
void print(raw_ostream &OS) const;
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
new file mode 100644
index 000000000000..d66609fddfec
--- /dev/null
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -0,0 +1,84 @@
+//===--- CrashRecoveryContext.h - Crash Recovery ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
+#define LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+
+/// \brief Crash recovery helper object.
+///
+/// This class implements support for running operations in a safe context so
+/// that crashes (memory errors, stack overflow, assertion violations) can be
+/// detected and control restored to the crashing thread. Crash detection is
+/// purely "best effort", the exact set of failures which can be recovered from
+/// is platform dependent.
+///
+/// Clients make use of this code by first calling
+/// CrashRecoveryContext::Enable(), and then executing unsafe operations via a
+/// CrashRecoveryContext object. For example:
+///
+/// void actual_work(void *);
+///
+/// void foo() {
+/// CrashRecoveryContext CRC;
+///
+/// if (!CRC.RunSafely(actual_work, 0)) {
+/// ... a crash was detected, report error to user ...
+/// }
+///
+/// ... no crash was detected ...
+/// }
+///
+/// Crash recovery contexts may not be nested.
+class CrashRecoveryContext {
+ void *Impl;
+
+public:
+ CrashRecoveryContext() : Impl(0) {}
+ ~CrashRecoveryContext();
+
+ /// \brief Enable crash recovery.
+ static void Enable();
+
+ /// \brief Disable crash recovery.
+ static void Disable();
+
+ /// \brief Return the active context, if the code is currently executing in a
+ /// thread which is in a protected context.
+ static CrashRecoveryContext *GetCurrent();
+
+ /// \brief Execute the provide callback function (with the given arguments) in
+ /// a protected context.
+ ///
+ /// \return True if the function completed successfully, and false if the
+ /// function crashed (or HandleCrash was called explicitly). Clients should
+ /// make as little assumptions as possible about the program state when
+ /// RunSafely has returned false. Clients can use getBacktrace() to retrieve
+ /// the backtrace of the crash on failures.
+ bool RunSafely(void (*Fn)(void*), void *UserData);
+
+ /// \brief Explicitly trigger a crash recovery in the current process, and
+ /// return failure from RunSafely(). This function does not return.
+ void HandleCrash();
+
+ /// \brief Return a string containing the backtrace where the crash was
+ /// detected; or empty if the backtrace wasn't recovered.
+ ///
+ /// This function is only valid when a crash has been detected (i.e.,
+ /// RunSafely() has returned false.
+ const std::string &getBacktrace() const;
+};
+
+}
+
+#endif
diff --git a/include/llvm/Support/DataFlow.h b/include/llvm/Support/DataFlow.h
index 8f79ead1c533..355c402f542d 100644
--- a/include/llvm/Support/DataFlow.h
+++ b/include/llvm/Support/DataFlow.h
@@ -25,7 +25,7 @@ namespace llvm {
template <> struct GraphTraits<const Value*> {
typedef const Value NodeType;
- typedef Value::use_const_iterator ChildIteratorType;
+ typedef Value::const_use_iterator ChildIteratorType;
static NodeType *getEntryNode(const Value *G) {
return G;
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 6f939e7e3435..83478b75cbc4 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -216,6 +216,27 @@ enum {
R_X86_64_TLSDESC = 36
};
+// i386 relocations.
+// TODO: this is just a subset
+enum {
+ R_386_NONE = 0,
+ R_386_32 = 1,
+ R_386_PC32 = 2,
+ R_386_GOT32 = 3,
+ R_386_PLT32 = 4,
+ R_386_COPY = 5,
+ R_386_GLOB_DAT = 6,
+ R_386_JUMP_SLOT = 7,
+ R_386_RELATIVE = 8,
+ R_386_GOTOFF = 9,
+ R_386_GOTPC = 10,
+ R_386_32PLT = 11,
+ R_386_16 = 20,
+ R_386_PC16 = 21,
+ R_386_8 = 22,
+ R_386_PC8 = 23
+};
+
// Section header.
struct Elf32_Shdr {
Elf32_Word sh_name; // Section name (index into string table)
@@ -257,22 +278,29 @@ enum {
// Section types.
enum {
- SHT_NULL = 0, // No associated section (inactive entry).
- SHT_PROGBITS = 1, // Program-defined contents.
- SHT_SYMTAB = 2, // Symbol table.
- SHT_STRTAB = 3, // String table.
- SHT_RELA = 4, // Relocation entries; explicit addends.
- SHT_HASH = 5, // Symbol hash table.
- SHT_DYNAMIC = 6, // Information for dynamic linking.
- SHT_NOTE = 7, // Information about the file.
- SHT_NOBITS = 8, // Data occupies no space in the file.
- SHT_REL = 9, // Relocation entries; no explicit addends.
- SHT_SHLIB = 10, // Reserved.
- SHT_DYNSYM = 11, // Symbol table.
- SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type.
- SHT_HIPROC = 0x7fffffff, // Highest processor architecture-specific type.
- SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
- SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
+ SHT_NULL = 0, // No associated section (inactive entry).
+ SHT_PROGBITS = 1, // Program-defined contents.
+ SHT_SYMTAB = 2, // Symbol table.
+ SHT_STRTAB = 3, // String table.
+ SHT_RELA = 4, // Relocation entries; explicit addends.
+ SHT_HASH = 5, // Symbol hash table.
+ SHT_DYNAMIC = 6, // Information for dynamic linking.
+ SHT_NOTE = 7, // Information about the file.
+ SHT_NOBITS = 8, // Data occupies no space in the file.
+ SHT_REL = 9, // Relocation entries; no explicit addends.
+ SHT_SHLIB = 10, // Reserved.
+ SHT_DYNSYM = 11, // Symbol table.
+ SHT_INIT_ARRAY = 14, // Pointers to initialisation functions.
+ SHT_FINI_ARRAY = 15, // Pointers to termination functions.
+ SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions.
+ SHT_GROUP = 17, // Section group.
+ SHT_SYMTAB_SHNDX = 18, // Indicies for SHN_XINDEX entries.
+ SHT_LOOS = 0x60000000, // Lowest operating system-specific type.
+ SHT_HIOS = 0x6fffffff, // Highest operating system-specific type.
+ SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type.
+ SHT_HIPROC = 0x7fffffff, // Highest processor architecture-specific type.
+ SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
+ SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
};
// Section flags.
@@ -323,6 +351,12 @@ struct Elf64_Sym {
}
};
+// The size (in bytes) of symbol table entries.
+enum {
+ SYMENTRY_SIZE32 = 16, // 32-bit symbol entry size
+ SYMENTRY_SIZE64 = 24 // 64-bit symbol entry size.
+};
+
// Symbol bindings.
enum {
STB_LOCAL = 0, // Local symbol, not visible outside obj file containing def
@@ -339,10 +373,19 @@ enum {
STT_FUNC = 2, // Symbol is executable code (function, etc.)
STT_SECTION = 3, // Symbol refers to a section
STT_FILE = 4, // Local, absolute symbol that refers to a file
+ STT_COMMON = 5, // An uninitialised common block
+ STT_TLS = 6, // Thread local data object
STT_LOPROC = 13, // Lowest processor-specific symbol type
STT_HIPROC = 15 // Highest processor-specific symbol type
};
+enum {
+ STV_DEFAULT = 0, // Visibility is specified by binding type
+ STV_INTERNAL = 1, // Defined by processor supplements
+ STV_HIDDEN = 2, // Not visible to other components
+ STV_PROTECTED = 3 // Visible in other components but not preemptable
+};
+
// Relocation entry, without explicit addend.
struct Elf32_Rel {
Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
@@ -356,7 +399,7 @@ struct Elf32_Rel {
void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
void setSymbolAndType(Elf32_Word s, unsigned char t) {
r_info = (s << 8) + t;
- };
+ }
};
// Relocation entry with explicit addend.
@@ -373,7 +416,7 @@ struct Elf32_Rela {
void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
void setSymbolAndType(Elf32_Word s, unsigned char t) {
r_info = (s << 8) + t;
- };
+ }
};
// Relocation entry, without explicit addend.
@@ -391,7 +434,7 @@ struct Elf64_Rel {
void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
void setSymbolAndType(Elf64_Xword s, unsigned char t) {
r_info = (s << 32) + (t&0xffffffffL);
- };
+ }
};
// Relocation entry with explicit addend.
@@ -410,7 +453,7 @@ struct Elf64_Rela {
void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
void setSymbolAndType(Elf64_Xword s, unsigned char t) {
r_info = (s << 32) + (t&0xffffffffL);
- };
+ }
};
// Program header for ELF32.
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index ffcb482f75c5..9854657c756f 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -52,6 +52,18 @@ namespace llvm {
/// llvm_stop_multithreaded().
void remove_fatal_error_handler();
+ /// ScopedFatalErrorHandler - This is a simple helper class which just
+ /// calls install_fatal_error_handler in its constructor and
+ /// remove_fatal_error_handler in its destructor.
+ struct ScopedFatalErrorHandler {
+ explicit ScopedFatalErrorHandler(fatal_error_handler_t handler,
+ void *user_data = 0) {
+ install_fatal_error_handler(handler, user_data);
+ }
+
+ ~ScopedFatalErrorHandler() { remove_fatal_error_handler(); }
+ };
+
/// Reports a serious error, calling any installed error handler. These
/// functions are intended to be used for error conditions which are outside
/// the control of the compiler (I/O errors, invalid user input, etc.)
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index 559f0040c2d9..287c5ba01eeb 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -89,7 +89,7 @@ class GraphWriter {
public:
GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
- DTraits = DOTTraits(SN);
+ DTraits = DOTTraits(SN);
}
void writeHeader(const std::string &Name) {
@@ -271,6 +271,12 @@ public:
O << "[" << Attrs << "]";
O << ";\n";
}
+
+ /// getOStream - Get the raw output stream into the graph file. Useful to
+ /// write fancy things using addCustomGraphFeatures().
+ raw_ostream &getOStream() {
+ return O;
+ }
};
template<typename GraphType>
@@ -316,7 +322,7 @@ sys::Path WriteGraph(const GraphType &G, const std::string &Name,
raw_fd_ostream O(Filename.c_str(), ErrorInfo);
if (ErrorInfo.empty()) {
- WriteGraph(O, G, ShortNames, Name, Title);
+ llvm::WriteGraph(O, G, ShortNames, Name, Title);
errs() << " done. \n";
} else {
errs() << "error opening file '" << Filename.str() << "' for writing!\n";
@@ -333,7 +339,7 @@ template<typename GraphType>
void ViewGraph(const GraphType &G, const std::string &Name,
bool ShortNames = false, const std::string &Title = "",
GraphProgram::Name Program = GraphProgram::DOT) {
- sys::Path Filename = WriteGraph(G, Name, ShortNames, Title);
+ sys::Path Filename = llvm::WriteGraph(G, Name, ShortNames, Title);
if (Filename.isEmpty())
return;
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 4b1b1c0411af..c827ccedd6f1 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -165,41 +165,21 @@ public:
}
/// getInt8 - Get a constant 8-bit value.
- ConstantInt *getInt8(int8_t C) {
- return ConstantInt::getSigned(getInt8Ty(), C);
- }
-
- /// getInt8 - Get a constant 8-bit value.
ConstantInt *getInt8(uint8_t C) {
return ConstantInt::get(getInt8Ty(), C);
}
/// getInt16 - Get a constant 16-bit value.
- ConstantInt *getInt16(int16_t C) {
- return ConstantInt::getSigned(getInt16Ty(), C);
- }
-
- /// getInt16 - Get a constant 16-bit value.
ConstantInt *getInt16(uint16_t C) {
return ConstantInt::get(getInt16Ty(), C);
}
/// getInt32 - Get a constant 32-bit value.
- ConstantInt *getInt32(int32_t C) {
- return ConstantInt::getSigned(getInt32Ty(), C);
- }
-
- /// getInt32 - Get a constant 32-bit value.
ConstantInt *getInt32(uint32_t C) {
return ConstantInt::get(getInt32Ty(), C);
}
/// getInt64 - Get a constant 64-bit value.
- ConstantInt *getInt64(int64_t C) {
- return ConstantInt::getSigned(getInt64Ty(), C);
- }
-
- /// getInt64 - Get a constant 64-bit value.
ConstantInt *getInt64(uint64_t C) {
return ConstantInt::get(getInt64Ty(), C);
}
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
index fe47c057558f..a44da528acfc 100644
--- a/include/llvm/Support/IRReader.h
+++ b/include/llvm/Support/IRReader.h
@@ -60,8 +60,7 @@ namespace llvm {
MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
if (F == 0) {
Err = SMDiagnostic(Filename,
- "Could not open input file "
- "'" + Filename + "': " + ErrMsg);
+ "Could not open input file: " + ErrMsg);
return 0;
}
@@ -79,10 +78,10 @@ namespace llvm {
(const unsigned char *)Buffer->getBufferEnd())) {
std::string ErrMsg;
Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
- // ParseBitcodeFile does not take ownership of the Buffer.
- delete Buffer;
if (M == 0)
Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
+ // ParseBitcodeFile does not take ownership of the Buffer.
+ delete Buffer;
return M;
}
@@ -99,8 +98,7 @@ namespace llvm {
MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg);
if (F == 0) {
Err = SMDiagnostic(Filename,
- "Could not open input file "
- "'" + Filename + "': " + ErrMsg);
+ "Could not open input file: " + ErrMsg);
return 0;
}
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
index e6fccfc17fca..4c13177926d2 100644
--- a/include/llvm/Support/MachO.h
+++ b/include/llvm/Support/MachO.h
@@ -14,11 +14,649 @@
#ifndef LLVM_SUPPORT_MACHO_H
#define LLVM_SUPPORT_MACHO_H
+#include "llvm/System/DataTypes.h"
+
// NOTE: The enums in this file are intentially named to be different than those
// in the headers in /usr/include/mach (on darwin systems) to avoid conflicts
// with those macros.
namespace llvm {
namespace MachO {
+ // Enums from <mach-o/loader.h>
+ enum {
+ // Constants for the "magic" field in llvm::MachO::mach_header and
+ // llvm::MachO::mach_header_64
+ HeaderMagic32 = 0xFEEDFACEu, // MH_MAGIC
+ HeaderMagic32Swapped = 0xCEFAEDFEu, // MH_CIGAM
+ HeaderMagic64 = 0xFEEDFACFu, // MH_MAGIC_64
+ HeaderMagic64Swapped = 0xCFFAEDFEu, // MH_CIGAM_64
+ UniversalMagic = 0xCAFEBABEu, // FAT_MAGIC
+ UniversalMagicSwapped = 0xBEBAFECAu, // FAT_CIGAM
+
+ // Constants for the "filetype" field in llvm::MachO::mach_header and
+ // llvm::MachO::mach_header_64
+ HeaderFileTypeObject = 0x1u, // MH_OBJECT
+ HeaderFileTypeExecutable = 0x2u, // MH_EXECUTE
+ HeaderFileTypeFixedVMShlib = 0x3u, // MH_FVMLIB
+ HeaderFileTypeCore = 0x4u, // MH_CORE
+ HeaderFileTypePreloadedExecutable = 0x5u, // MH_PRELOAD
+ HeaderFileTypeDynamicShlib = 0x6u, // MH_DYLIB
+ HeaderFileTypeDynamicLinkEditor = 0x7u, // MH_DYLINKER
+ HeaderFileTypeBundle = 0x8u, // MH_BUNDLE
+ HeaderFileTypeDynamicShlibStub = 0x9u, // MH_DYLIB_STUB
+ HeaderFileTypeDSYM = 0xAu, // MH_DSYM
+ HeaderFileTypeKextBundle = 0xBu, // MH_KEXT_BUNDLE
+
+ // Constant bits for the "flags" field in llvm::MachO::mach_header and
+ // llvm::MachO::mach_header_64
+ HeaderFlagBitNoUndefinedSymbols = 0x00000001u, // MH_NOUNDEFS
+ HeaderFlagBitIsIncrementalLinkObject= 0x00000002u, // MH_INCRLINK
+ HeaderFlagBitIsDynamicLinkObject = 0x00000004u, // MH_DYLDLINK
+ HeaderFlagBitBindAtLoad = 0x00000008u, // MH_BINDATLOAD
+ HeaderFlagBitPrebound = 0x00000010u, // MH_PREBOUND
+ HeaderFlagBitSplitSegments = 0x00000020u, // MH_SPLIT_SEGS
+ HeaderFlagBitLazyInit = 0x00000040u, // MH_LAZY_INIT
+ HeaderFlagBitTwoLevelNamespace = 0x00000080u, // MH_TWOLEVEL
+ HeaderFlagBitForceFlatNamespace = 0x00000100u, // MH_FORCE_FLAT
+ HeaderFlagBitNoMultipleDefintions = 0x00000200u, // MH_NOMULTIDEFS
+ HeaderFlagBitNoFixPrebinding = 0x00000400u, // MH_NOFIXPREBINDING
+ HeaderFlagBitPrebindable = 0x00000800u, // MH_PREBINDABLE
+ HeaderFlagBitAllModulesBound = 0x00001000u, // MH_ALLMODSBOUND
+ HeaderFlagBitSubsectionsViaSymbols = 0x00002000u, // MH_SUBSECTIONS_VIA_SYMBOLS
+ HeaderFlagBitCanonical = 0x00004000u, // MH_CANONICAL
+ HeaderFlagBitWeakDefines = 0x00008000u, // MH_WEAK_DEFINES
+ HeaderFlagBitBindsToWeak = 0x00010000u, // MH_BINDS_TO_WEAK
+ HeaderFlagBitAllowStackExecution = 0x00020000u, // MH_ALLOW_STACK_EXECUTION
+ HeaderFlagBitRootSafe = 0x00040000u, // MH_ROOT_SAFE
+ HeaderFlagBitSetUIDSafe = 0x00080000u, // MH_SETUID_SAFE
+ HeaderFlagBitNoReexportedDylibs = 0x00100000u, // MH_NO_REEXPORTED_DYLIBS
+ HeaderFlagBitPIE = 0x00200000u, // MH_PIE
+ HeaderFlagBitDeadStrippableDylib = 0x00400000u, // MH_DEAD_STRIPPABLE_DYLIB
+
+ // Constants for the "cmd" field in llvm::MachO::load_command
+ LoadCommandDynamicLinkerRequired = 0x80000000u, // LC_REQ_DYLD
+ LoadCommandSegment32 = 0x00000001u, // LC_SEGMENT
+ LoadCommandSymtab = 0x00000002u, // LC_SYMTAB
+ LoadCommandSymSeg = 0x00000003u, // LC_SYMSEG
+ LoadCommandThread = 0x00000004u, // LC_THREAD
+ LoadCommandUnixThread = 0x00000005u, // LC_UNIXTHREAD
+ LoadCommandFixedVMShlibLoad = 0x00000006u, // LC_LOADFVMLIB
+ LoadCommandFixedVMShlibIdent = 0x00000007u, // LC_IDFVMLIB
+ LoadCommandIdent = 0x00000008u, // LC_IDENT
+ LoadCommandFixedVMFileInclusion = 0x00000009u, // LC_FVMFILE
+ LoadCommandPrePage = 0x0000000Au, // LC_PREPAGE
+ LoadCommandDynamicSymtabInfo = 0x0000000Bu, // LC_DYSYMTAB
+ LoadCommandDylibLoad = 0x0000000Cu, // LC_LOAD_DYLIB
+ LoadCommandDylibIdent = 0x0000000Du, // LC_ID_DYLIB
+ LoadCommandDynamicLinkerLoad = 0x0000000Eu, // LC_LOAD_DYLINKER
+ LoadCommandDynamicLinkerIdent = 0x0000000Fu, // LC_ID_DYLINKER
+ LoadCommandDylibPrebound = 0x00000010u, // LC_PREBOUND_DYLIB
+ LoadCommandRoutines32 = 0x00000011u, // LC_ROUTINES
+ LoadCommandSubFramework = 0x00000012u, // LC_SUB_FRAMEWORK
+ LoadCommandSubUmbrella = 0x00000013u, // LC_SUB_UMBRELLA
+ LoadCommandSubClient = 0x00000014u, // LC_SUB_CLIENT
+ LoadCommandSubLibrary = 0x00000015u, // LC_SUB_LIBRARY
+ LoadCommandTwoLevelHints = 0x00000016u, // LC_TWOLEVEL_HINTS
+ LoadCommandPreBindChecksum = 0x00000017u, // LC_PREBIND_CKSUM
+ LoadCommandDylibLoadWeak = 0x80000018u, // LC_LOAD_WEAK_DYLIB
+ LoadCommandSegment64 = 0x00000019u, // LC_SEGMENT_64
+ LoadCommandRoutines64 = 0x0000001Au, // LC_ROUTINES_64
+ LoadCommandUUID = 0x0000001Bu, // LC_UUID
+ LoadCommandRunpath = 0x8000001Cu, // LC_RPATH
+ LoadCommandCodeSignature = 0x0000001Du, // LC_CODE_SIGNATURE
+ LoadCommandSegmentSplitInfo = 0x0000001Eu, // LC_SEGMENT_SPLIT_INFO
+ LoadCommandDylibReexport = 0x8000001Fu, // LC_REEXPORT_DYLIB
+ LoadCommandDylibLazyLoad = 0x00000020u, // LC_LAZY_LOAD_DYLIB
+ LoadCommandEncryptionInfo = 0x00000021u, // LC_ENCRYPTION_INFO
+ LoadCommandDynamicLinkerInfo = 0x00000022u, // LC_DYLD_INFO
+ LoadCommandDynamicLinkerInfoOnly = 0x80000022u, // LC_DYLD_INFO_ONLY
+ LoadCommandDylibLoadUpward = 0x80000023u, // LC_LOAD_UPWARD_DYLIB
+
+ // Constant bits for the "flags" field in llvm::MachO::segment_command
+ SegmentCommandFlagBitHighVM = 0x1u, // SG_HIGHVM
+ SegmentCommandFlagBitFixedVMLibrary = 0x2u, // SG_FVMLIB
+ SegmentCommandFlagBitNoRelocations = 0x4u, // SG_NORELOC
+ SegmentCommandFlagBitProtectedVersion1 = 0x8u, // SG_PROTECTED_VERSION_1
+
+
+ // Constant masks for the "flags" field in llvm::MachO::section and
+ // llvm::MachO::section_64
+ SectionFlagMaskSectionType = 0x000000ffu, // SECTION_TYPE
+ SectionFlagMaskAllAttributes = 0xffffff00u, // SECTION_ATTRIBUTES
+ SectionFlagMaskUserAttributes = 0xff000000u, // SECTION_ATTRIBUTES_USR
+ SectionFlagMaskSystemAttributes = 0x00ffff00u, // SECTION_ATTRIBUTES_SYS
+
+ // Constant masks for the "flags[7:0]" field in llvm::MachO::section and
+ // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE)
+ SectionTypeRegular = 0x00u, // S_REGULAR
+ SectionTypeZeroFill = 0x01u, // S_ZEROFILL
+ SectionTypeCStringLiterals = 0x02u, // S_CSTRING_LITERALS
+ SectionType4ByteLiterals = 0x03u, // S_4BYTE_LITERALS
+ SectionType8ByteLiterals = 0x04u, // S_8BYTE_LITERALS
+ SectionTypeLiteralPointers = 0x05u, // S_LITERAL_POINTERS
+ SectionTypeNonLazySymbolPointers = 0x06u, // S_NON_LAZY_SYMBOL_POINTERS
+ SectionTypeLazySymbolPointers = 0x07u, // S_LAZY_SYMBOL_POINTERS
+ SectionTypeSymbolStubs = 0x08u, // S_SYMBOL_STUBS
+ SectionTypeModuleInitFunctionPointers = 0x09u, // S_MOD_INIT_FUNC_POINTERS
+ SectionTypeModuleTermFunctionPointers = 0x0au, // S_MOD_TERM_FUNC_POINTERS
+ SectionTypeCoalesced = 0x0bu, // S_COALESCED
+ SectionTypeZeroFillLarge = 0x0cu, // S_GB_ZEROFILL
+ SectionTypeInterposing = 0x0du, // S_INTERPOSING
+ SectionType16ByteLiterals = 0x0eu, // S_16BYTE_LITERALS
+ SectionTypeDTraceObjectFormat = 0x0fu, // S_DTRACE_DOF
+ SectionTypeLazyDylibSymbolPointers = 0x10u, // S_LAZY_DYLIB_SYMBOL_POINTERS
+
+ // Constant masks for the "flags[31:24]" field in llvm::MachO::section and
+ // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR)
+ SectionAttrUserPureInstructions = 0x80000000u, // S_ATTR_PURE_INSTRUCTIONS
+ SectionAttrUserNoTableOfContents = 0x40000000u, // S_ATTR_NO_TOC
+ SectionAttrUserCanStripStaticSymbols = 0x20000000u, // S_ATTR_STRIP_STATIC_SYMS
+ SectionAttrUserNoDeadStrip = 0x10000000u, // S_ATTR_NO_DEAD_STRIP
+ SectionAttrUserLiveSupport = 0x08000000u, // S_ATTR_LIVE_SUPPORT
+ SectionAttrUserSelfModifyingCode = 0x04000000u, // S_ATTR_SELF_MODIFYING_CODE
+ SectionAttrUserDebug = 0x02000000u, // S_ATTR_DEBUG
+
+ // Constant masks for the "flags[23:8]" field in llvm::MachO::section and
+ // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS)
+ SectionAttrSytemSomeInstructions = 0x00000400u, // S_ATTR_SOME_INSTRUCTIONS
+ SectionAttrSytemHasExternalRelocations= 0x00000200u, // S_ATTR_EXT_RELOC
+ SectionAttrSytemHasLocalRelocations = 0x00000100u, // S_ATTR_LOC_RELOC
+
+ IndirectSymbolLocal = 0x80000000u, // INDIRECT_SYMBOL_LOCAL
+ IndirectSymbolAbsolute = 0x40000000u, // INDIRECT_SYMBOL_ABS
+
+ RebaseTypePointer = 1u, // REBASE_TYPE_POINTER
+ RebaseTypeTextAbsolute32 = 2u, // REBASE_TYPE_TEXT_ABSOLUTE32
+ RebaseTypeTextPCRelative32 = 3u, // REBASE_TYPE_TEXT_PCREL32
+
+ RebaseOpcodeMask = 0xF0u, // REBASE_OPCODE_MASK
+ RebaseImmediateMask = 0x0Fu, // REBASE_IMMEDIATE_MASK
+ RebaseOpcodeDone = 0x00u, // REBASE_OPCODE_DONE
+ RebaseOpcodeSetTypeImmediate = 0x10u, // REBASE_OPCODE_SET_TYPE_IMM
+ RebaseOpcodeSetSegmentAndOffsetULEB = 0x20u, // REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ RebaseOpcodeAddAddressULEB = 0x30u, // REBASE_OPCODE_ADD_ADDR_ULEB
+ RebaseOpcodeAddAddressImmediateScaled = 0x40u, // REBASE_OPCODE_ADD_ADDR_IMM_SCALED
+ RebaseOpcodeDoRebaseImmediateTimes = 0x50u, // REBASE_OPCODE_DO_REBASE_IMM_TIMES
+ RebaseOpcodeDoRebaseULEBTimes = 0x60u, // REBASE_OPCODE_DO_REBASE_ULEB_TIMES
+ RebaseOpcodeDoRebaseAddAddressULEB = 0x70u, // REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
+ RebaseOpcodeDoRebaseULEBTimesSkippingULEB = 0x80u, // REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
+
+
+ BindTypePointer = 1u, // BIND_TYPE_POINTER
+ BindTypeTextAbsolute32 = 2u, // BIND_TYPE_TEXT_ABSOLUTE32
+ BindTypeTextPCRelative32 = 3u, // BIND_TYPE_TEXT_PCREL32
+
+ BindSpecialDylibSelf = 0u, // BIND_SPECIAL_DYLIB_SELF
+ BindSpecialDylibMainExecutable = -1u, // BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
+ BindSpecialDylibFlatLookup = -2u, // BIND_SPECIAL_DYLIB_FLAT_LOOKUP
+
+ BindSymbolFlagsWeakImport = 0x1u, // BIND_SYMBOL_FLAGS_WEAK_IMPORT
+ BindSymbolFlagsNonWeakDefinition = 0x8u, // BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
+
+ BindOpcodeMask = 0xF0u, // BIND_OPCODE_MASK
+ BindImmediateMask = 0x0Fu, // BIND_IMMEDIATE_MASK
+ BindOpcodeDone = 0x00u, // BIND_OPCODE_DONE
+ BindOpcodeSetDylibOrdinalImmediate = 0x10u, // BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
+ BindOpcodeSetDylibOrdinalULEB = 0x20u, // BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
+ BindOpcodeSetDylibSpecialImmediate = 0x30u, // BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
+ BindOpcodeSetSymbolTrailingFlagsImmediate = 0x40u, // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
+ BindOpcodeSetTypeImmediate = 0x50u, // BIND_OPCODE_SET_TYPE_IMM
+ BindOpcodeSetAppendSLEB = 0x60u, // BIND_OPCODE_SET_ADDEND_SLEB
+ BindOpcodeSetSegmentAndOffsetULEB = 0x70u, // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+ BindOpcodeAddAddressULEB = 0x80u, // BIND_OPCODE_ADD_ADDR_ULEB
+ BindOpcodeDoBind = 0x90u, // BIND_OPCODE_DO_BIND
+ BindOpcodeDoBindAddAddressULEB = 0xA0u, // BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
+ BindOpcodeDoBindAddAddressImmediateScaled = 0xB0u, // BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
+ BindOpcodeDoBindULEBTimesSkippingULEB = 0xC0u, // BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
+
+ ExportSymbolFlagsKindMask = 0x03u, // EXPORT_SYMBOL_FLAGS_KIND_MASK
+ ExportSymbolFlagsKindRegular = 0x00u, // EXPORT_SYMBOL_FLAGS_KIND_REGULAR
+ ExportSymbolFlagsKindThreadLocal = 0x01u, // EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
+ ExportSymbolFlagsWeakDefinition = 0x04u, // EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION
+ ExportSymbolFlagsIndirectDefinition = 0x08u, // EXPORT_SYMBOL_FLAGS_INDIRECT_DEFINITION
+ ExportSymbolFlagsHasSpecializations = 0x10u, // EXPORT_SYMBOL_FLAGS_HAS_SPECIALIZATIONS
+
+
+ // Constant masks for the "n_type" field in llvm::MachO::nlist and
+ // llvm::MachO::nlist_64
+ NlistMaskStab = 0xe0, // N_STAB
+ NlistMaskPrivateExternal = 0x10, // N_PEXT
+ NlistMaskType = 0x0e, // N_TYPE
+ NlistMaskExternal = 0x01, // N_EXT
+
+ // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and
+ // llvm::MachO::nlist_64
+ NListTypeUndefined = 0x0u, // N_UNDF
+ NListTypeAbsolute = 0x2u, // N_ABS
+ NListTypeSection = 0xeu, // N_SECT
+ NListTypePreboundUndefined = 0xcu, // N_PBUD
+ NListTypeIndirect = 0xau, // N_INDR
+
+ // Constant masks for the "n_sect" field in llvm::MachO::nlist and
+ // llvm::MachO::nlist_64
+ NListSectionNoSection = 0u, // NO_SECT
+ NListSectionMaxSection = 0xffu, // MAX_SECT
+
+ // Constant values for the "n_type" field in llvm::MachO::nlist and
+ // llvm::MachO::nlist_64 when "(n_type & NlistMaskStab) != 0"
+ StabGlobalSymbol = 0x20u, // N_GSYM
+ StabFunctionName = 0x22u, // N_FNAME
+ StabFunction = 0x24u, // N_FUN
+ StabStaticSymbol = 0x26u, // N_STSYM
+ StabLocalCommon = 0x28u, // N_LCSYM
+ StabBeginSymbol = 0x2Eu, // N_BNSYM
+ StabSourceFileOptions = 0x3Cu, // N_OPT
+ StabRegisterSymbol = 0x40u, // N_RSYM
+ StabSourceLine = 0x44u, // N_SLINE
+ StabEndSymbol = 0x4Eu, // N_ENSYM
+ StabStructureType = 0x60u, // N_SSYM
+ StabSourceFileName = 0x64u, // N_SO
+ StabObjectFileName = 0x66u, // N_OSO
+ StabLocalSymbol = 0x80u, // N_LSYM
+ StabBeginIncludeFileName = 0x82u, // N_BINCL
+ StabIncludeFileName = 0x84u, // N_SOL
+ StabCompilerParameters = 0x86u, // N_PARAMS
+ StabCompilerVersion = 0x88u, // N_VERSION
+ StabCompilerOptLevel = 0x8Au, // N_OLEVEL
+ StabParameter = 0xA0u, // N_PSYM
+ StabEndIncludeFile = 0xA2u, // N_EINCL
+ StabAlternateEntry = 0xA4u, // N_ENTRY
+ StabLeftBracket = 0xC0u, // N_LBRAC
+ StabDeletedIncludeFile = 0xC2u, // N_EXCL
+ StabRightBracket = 0xE0u, // N_RBRAC
+ StabBeginCommon = 0xE2u, // N_BCOMM
+ StabEndCommon = 0xE4u, // N_ECOMM
+ StabEndCommonLocal = 0xE8u, // N_ECOML
+ StabLength = 0xFEu // N_LENG
+
+ };
+
+ // Structs from <mach-o/loader.h>
+
+ struct mach_header {
+ uint32_t magic;
+ uint32_t cputype;
+ uint32_t cpusubtype;
+ uint32_t filetype;
+ uint32_t ncmds;
+ uint32_t sizeofcmds;
+ uint32_t flags;
+ };
+
+ struct mach_header_64 {
+ uint32_t magic;
+ uint32_t cputype;
+ uint32_t cpusubtype;
+ uint32_t filetype;
+ uint32_t ncmds;
+ uint32_t sizeofcmds;
+ uint32_t flags;
+ uint32_t reserved;
+ };
+
+ struct load_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ };
+
+ struct segment_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ char segname[16];
+ uint32_t vmaddr;
+ uint32_t vmsize;
+ uint32_t fileoff;
+ uint32_t filesize;
+ uint32_t maxprot;
+ uint32_t initprot;
+ uint32_t nsects;
+ uint32_t flags;
+ };
+
+ struct segment_command_64 {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ char segname[16];
+ uint64_t vmaddr;
+ uint64_t vmsize;
+ uint64_t fileoff;
+ uint64_t filesize;
+ uint32_t maxprot;
+ uint32_t initprot;
+ uint32_t nsects;
+ uint32_t flags;
+ };
+
+ struct section {
+ char sectname[16];
+ char segname[16];
+ uint32_t addr;
+ uint32_t size;
+ uint32_t offset;
+ uint32_t align;
+ uint32_t reloff;
+ uint32_t nreloc;
+ uint32_t flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ };
+
+ struct section_64 {
+ char sectname[16];
+ char segname[16];
+ uint64_t addr;
+ uint64_t size;
+ uint32_t offset;
+ uint32_t align;
+ uint32_t reloff;
+ uint32_t nreloc;
+ uint32_t flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t reserved3;
+ };
+
+ struct fvmlib {
+ uint32_t name;
+ uint32_t minor_version;
+ uint32_t header_addr;
+ };
+
+ struct fvmlib_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ struct fvmlib fvmlib;
+ };
+
+ struct dylib {
+ uint32_t name;
+ uint32_t timestamp;
+ uint32_t current_version;
+ uint32_t compatibility_version;
+ };
+
+ struct dylib_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ struct dylib dylib;
+ };
+
+ struct sub_framework_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t umbrella;
+ };
+
+ struct sub_client_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t client;
+ };
+
+ struct sub_umbrella_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t sub_umbrella;
+ };
+
+ struct sub_library_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t sub_library;
+ };
+
+ struct prebound_dylib_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t name;
+ uint32_t nmodules;
+ uint32_t linked_modules;
+ };
+
+ struct dylinker_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t name;
+ };
+
+ struct thread_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ };
+
+ struct routines_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t init_address;
+ uint32_t init_module;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t reserved3;
+ uint32_t reserved4;
+ uint32_t reserved5;
+ uint32_t reserved6;
+ };
+
+ struct routines_command_64 {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint64_t init_address;
+ uint64_t init_module;
+ uint64_t reserved1;
+ uint64_t reserved2;
+ uint64_t reserved3;
+ uint64_t reserved4;
+ uint64_t reserved5;
+ uint64_t reserved6;
+ };
+
+ struct symtab_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t symoff;
+ uint32_t nsyms;
+ uint32_t stroff;
+ uint32_t strsize;
+ };
+
+ struct dysymtab_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t ilocalsym;
+ uint32_t nlocalsym;
+ uint32_t iextdefsym;
+ uint32_t nextdefsym;
+ uint32_t iundefsym;
+ uint32_t nundefsym;
+ uint32_t tocoff;
+ uint32_t ntoc;
+ uint32_t modtaboff;
+ uint32_t nmodtab;
+ uint32_t extrefsymoff;
+ uint32_t nextrefsyms;
+ uint32_t indirectsymoff;
+ uint32_t nindirectsyms;
+ uint32_t extreloff;
+ uint32_t nextrel;
+ uint32_t locreloff;
+ uint32_t nlocrel;
+ };
+
+ struct dylib_table_of_contents {
+ uint32_t symbol_index;
+ uint32_t module_index;
+ };
+
+ struct dylib_module {
+ uint32_t module_name;
+ uint32_t iextdefsym;
+ uint32_t nextdefsym;
+ uint32_t irefsym;
+ uint32_t nrefsym;
+ uint32_t ilocalsym;
+ uint32_t nlocalsym;
+ uint32_t iextrel;
+ uint32_t nextrel;
+ uint32_t iinit_iterm;
+ uint32_t ninit_nterm;
+ uint32_t objc_module_info_addr;
+ uint32_t objc_module_info_size;
+ };
+
+ struct dylib_module_64 {
+ uint32_t module_name;
+ uint32_t iextdefsym;
+ uint32_t nextdefsym;
+ uint32_t irefsym;
+ uint32_t nrefsym;
+ uint32_t ilocalsym;
+ uint32_t nlocalsym;
+ uint32_t iextrel;
+ uint32_t nextrel;
+ uint32_t iinit_iterm;
+ uint32_t ninit_nterm;
+ uint32_t objc_module_info_size;
+ uint64_t objc_module_info_addr;
+ };
+
+ struct dylib_reference {
+ uint32_t isym:24,
+ flags:8;
+ };
+
+
+ struct twolevel_hints_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t offset;
+ uint32_t nhints;
+ };
+
+ struct twolevel_hint {
+ uint32_t isub_image:8,
+ itoc:24;
+ };
+
+ struct prebind_cksum_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t cksum;
+ };
+
+ struct uuid_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint8_t uuid[16];
+ };
+
+ struct rpath_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t path;
+ };
+
+ struct linkedit_data_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t dataoff;
+ uint32_t datasize;
+ };
+
+ struct encryption_info_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t cryptoff;
+ uint32_t cryptsize;
+ uint32_t cryptid;
+ };
+
+ struct dyld_info_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t rebase_off;
+ uint32_t rebase_size;
+ uint32_t bind_off;
+ uint32_t bind_size;
+ uint32_t weak_bind_off;
+ uint32_t weak_bind_size;
+ uint32_t lazy_bind_off;
+ uint32_t lazy_bind_size;
+ uint32_t export_off;
+ uint32_t export_size;
+ };
+
+ struct symseg_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t offset;
+ uint32_t size;
+ };
+
+ struct ident_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ };
+
+ struct fvmfile_command {
+ uint32_t cmd;
+ uint32_t cmdsize;
+ uint32_t name;
+ uint32_t header_addr;
+ };
+
+
+ // Structs from <mach-o/fat.h>
+ struct fat_header {
+ uint32_t magic;
+ uint32_t nfat_arch;
+ };
+
+ struct fat_arch {
+ uint32_t cputype;
+ uint32_t cpusubtype;
+ uint32_t offset;
+ uint32_t size;
+ uint32_t align;
+ };
+
+ // Structs from <mach-o/fat.h>
+ struct nlist {
+ uint32_t n_strx;
+ uint8_t n_type;
+ uint8_t n_sect;
+ int16_t n_desc;
+ uint32_t n_value;
+ };
+
+ struct nlist_64 {
+ uint32_t n_strx;
+ uint8_t n_type;
+ uint8_t n_sect;
+ uint16_t n_desc;
+ uint64_t n_value;
+ };
+
+ // Get/Set functions from <mach-o/nlist.h>
+
+ static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc)
+ {
+ return (((n_desc) >> 8u) & 0xffu);
+ }
+
+ static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal)
+ {
+ n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8));
+ }
+
+ static inline uint8_t GET_COMM_ALIGN (uint16_t n_desc)
+ {
+ return (n_desc >> 8u) & 0x0fu;
+ }
+
+ static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align)
+ {
+ n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u));
+ }
+
// Enums from <mach/machine.h>
enum {
// Capability bits used in the definition of cpu_type.
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 80d11ae062f7..982813f7186f 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -126,7 +126,8 @@ inline uint16_t ByteSwap_16(uint16_t Value) {
/// ByteSwap_32 - This function returns a byte-swapped representation of the
/// 32-bit argument, Value.
inline uint32_t ByteSwap_32(uint32_t Value) {
-#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+#if defined(__llvm__) || \
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
return __builtin_bswap32(Value);
#elif defined(_MSC_VER) && !defined(_DEBUG)
return _byteswap_ulong(Value);
@@ -142,7 +143,8 @@ inline uint32_t ByteSwap_32(uint32_t Value) {
/// ByteSwap_64 - This function returns a byte-swapped representation of the
/// 64-bit argument, Value.
inline uint64_t ByteSwap_64(uint64_t Value) {
-#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+#if defined(__llvm__) || \
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
return __builtin_bswap64(Value);
#elif defined(_MSC_VER) && !defined(_DEBUG)
return _byteswap_uint64(Value);
diff --git a/include/llvm/Support/PassNameParser.h b/include/llvm/Support/PassNameParser.h
index cdca978cfef6..a24a6f0c5e94 100644
--- a/include/llvm/Support/PassNameParser.h
+++ b/include/llvm/Support/PassNameParser.h
@@ -23,11 +23,11 @@
#ifndef LLVM_SUPPORT_PASS_NAME_PARSER_H
#define LLVM_SUPPORT_PASS_NAME_PARSER_H
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Pass.h"
-#include <algorithm>
#include <cstring>
namespace llvm {
@@ -42,8 +42,7 @@ class PassNameParser : public PassRegistrationListener,
public:
PassNameParser() : Opt(0) {}
virtual ~PassNameParser();
-
-
+
void initialize(cl::Option &O) {
Opt = &O;
cl::parser<const PassInfo*>::initialize(O);
@@ -77,20 +76,21 @@ public:
}
virtual void passEnumerate(const PassInfo *P) { passRegistered(P); }
- // ValLessThan - Provide a sorting comparator for Values elements...
- typedef std::pair<const char*,
- std::pair<const PassInfo*, const char*> > ValType;
- static bool ValLessThan(const ValType &VT1, const ValType &VT2) {
- return std::string(VT1.first) < std::string(VT2.first);
- }
-
// printOptionInfo - Print out information about this option. Override the
// default implementation to sort the table before we print...
virtual void printOptionInfo(const cl::Option &O, size_t GlobalWidth) const {
PassNameParser *PNP = const_cast<PassNameParser*>(this);
- std::sort(PNP->Values.begin(), PNP->Values.end(), ValLessThan);
+ array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValLessThan);
cl::parser<const PassInfo*>::printOptionInfo(O, GlobalWidth);
}
+
+private:
+ // ValLessThan - Provide a sorting comparator for Values elements...
+ static int ValLessThan(const void *VT1, const void *VT2) {
+ typedef PassNameParser::OptionInfo ValType;
+ return std::strcmp(static_cast<const ValType *>(VT1)->Name,
+ static_cast<const ValType *>(VT2)->Name);
+ }
};
///===----------------------------------------------------------------------===//
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index f02bc347a17e..bee676863780 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -453,6 +453,13 @@ struct CastClass_match {
}
};
+/// m_BitCast
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::BitCast>
+m_BitCast(const OpTy &Op) {
+ return CastClass_match<OpTy, Instruction::BitCast>(Op);
+}
+
/// m_PtrToInt
template<typename OpTy>
inline CastClass_match<OpTy, Instruction::PtrToInt>
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 0db84e1a14c6..6dbce393b97e 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -24,10 +24,10 @@ namespace llvm {
/// handlers which conflict with the ones installed by this module.
/// Defaults to false.
extern bool DisablePrettyStackTrace;
-
+
/// PrettyStackTraceEntry - This class is used to represent a frame of the
/// "pretty" stack trace that is dumped when a program crashes. You can define
- /// subclasses of this and declare them on the program stack: when they are
+ /// subclasses of this and declare them on the program stack: when they are
/// constructed and destructed, they will add their symbolic frames to a
/// virtual stack trace. This gets dumped out if the program crashes.
class PrettyStackTraceEntry {
@@ -37,14 +37,14 @@ namespace llvm {
public:
PrettyStackTraceEntry();
virtual ~PrettyStackTraceEntry();
-
+
/// print - Emit information about this stack frame to OS.
virtual void print(raw_ostream &OS) const = 0;
-
+
/// getNextEntry - Return the next entry in the list of frames.
const PrettyStackTraceEntry *getNextEntry() const { return NextEntry; }
};
-
+
/// PrettyStackTraceString - This object prints a specified string (which
/// should not contain newlines) to the stream as the stack trace when a crash
/// occurs.
@@ -54,7 +54,7 @@ namespace llvm {
PrettyStackTraceString(const char *str) : Str(str) {}
virtual void print(raw_ostream &OS) const;
};
-
+
/// PrettyStackTraceProgram - This object prints a specified program arguments
/// to the stream as the stack trace when a crash occurs.
class PrettyStackTraceProgram : public PrettyStackTraceEntry {
@@ -65,7 +65,7 @@ namespace llvm {
: ArgC(argc), ArgV(argv) {}
virtual void print(raw_ostream &OS) const;
};
-
+
} // end namespace llvm
#endif
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index ea65ccf3aa3a..b46a66889e96 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_REGEX_H
+#define LLVM_SUPPORT_REGEX_H
+
#include <string>
struct llvm_regex;
@@ -18,7 +21,7 @@ struct llvm_regex;
namespace llvm {
class StringRef;
template<typename T> class SmallVectorImpl;
-
+
class Regex {
public:
enum {
@@ -26,9 +29,9 @@ namespace llvm {
/// Compile for matching that ignores upper/lower case distinctions.
IgnoreCase=1,
/// Compile for newline-sensitive matching. With this flag '[^' bracket
- /// expressions and '.' never match newline. A ^ anchor matches the
- /// null string after any newline in the string in addition to its normal
- /// function, and the $ anchor matches the null string before any
+ /// expressions and '.' never match newline. A ^ anchor matches the
+ /// null string after any newline in the string in addition to its normal
+ /// function, and the $ anchor matches the null string before any
/// newline in the string in addition to its normal function.
Newline=2
};
@@ -47,7 +50,7 @@ namespace llvm {
/// matches it contains. The number filled in by match will include this
/// many entries plus one for the whole regex (as element 0).
unsigned getNumMatches() const;
-
+
/// matches - Match the regex against a given \arg String.
///
/// \param Matches - If given, on a succesful match this will be filled in
@@ -74,3 +77,5 @@ namespace llvm {
int error;
};
}
+
+#endif // LLVM_SUPPORT_REGEX_H
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 4db88825afb5..d0375bedd9f2 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -203,6 +203,8 @@ namespace llvm {
};
+ // Since these are defined in a header file, plugins must be sure to export
+ // these symbols.
template <typename T, typename U>
typename Registry<T,U>::node *Registry<T,U>::Head;
diff --git a/include/llvm/Support/SlowOperationInformer.h b/include/llvm/Support/SlowOperationInformer.h
deleted file mode 100644
index 607d993fff7b..000000000000
--- a/include/llvm/Support/SlowOperationInformer.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- llvm/Support/SlowOperationInformer.h - Keep user informed *- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple object which can be used to let the user know what
-// is going on when a slow operation is happening, and gives them the ability to
-// cancel it. Potentially slow operations can stack allocate one of these
-// objects, and periodically call the "progress" method to update the progress
-// bar. If the operation takes more than 1 second to complete, the progress bar
-// is automatically shown and updated. As such, the slow operation should not
-// print stuff to the screen, and should not be confused if an extra line
-// appears on the screen (ie, the cursor should be at the start of the line).
-//
-// If the user presses CTRL-C during the operation, the next invocation of the
-// progress method return true indicating that the operation was cancelled.
-//
-// Because SlowOperationInformers fiddle around with signals, they cannot be
-// nested, and interact poorly with threads. The SIGALRM handler is set back to
-// SIGDFL, but the SIGINT signal handler is restored when the
-// SlowOperationInformer is destroyed.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_SLOW_OPERATION_INFORMER_H
-#define LLVM_SUPPORT_SLOW_OPERATION_INFORMER_H
-
-#include <string>
-#include <cassert>
-#include "llvm/System/DataTypes.h"
-
-namespace llvm {
- class SlowOperationInformer {
- std::string OperationName;
- unsigned LastPrintAmount;
-
- SlowOperationInformer(const SlowOperationInformer&); // DO NOT IMPLEMENT
- void operator=(const SlowOperationInformer&); // DO NOT IMPLEMENT
- public:
- explicit SlowOperationInformer(const std::string &Name);
- ~SlowOperationInformer();
-
- /// progress - Clients should periodically call this method when they can
- /// handle cancellation. The Amount variable should indicate how far
- /// along the operation is, given in 1/10ths of a percent (in other words,
- /// Amount should range from 0 to 1000). If the user cancels the operation,
- /// this returns true, false otherwise.
- bool progress(unsigned Amount);
-
- /// progress - Same as the method above, but this performs the division for
- /// you, and helps you avoid overflow if you are dealing with largish
- /// numbers.
- bool progress(unsigned Current, unsigned Maximum) {
- assert(Maximum != 0 &&
- "Shouldn't be doing work if there is nothing to do!");
- return progress(Current*uint64_t(1000UL)/Maximum);
- }
- };
-} // end namespace llvm
-
-#endif /* SLOW_OPERATION_INFORMER_H */
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index 9cd35d1f9311..270ab2b2f85c 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -161,8 +161,8 @@ public:
// Diagnostic with no location (e.g. file not found, command line arg error).
SMDiagnostic(const std::string &filename, const std::string &Msg,
bool showline = true)
- : SM(0), Loc(), Filename(filename), LineNo(-1), ColumnNo(-1),
- Message(Msg), LineContents(""), ShowLine(showline) {}
+ : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1),
+ Message(Msg), ShowLine(showline) {}
// Diagnostic with a location.
SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h
index b97ad29d2eb7..bb3bddd3c799 100644
--- a/include/llvm/Support/StandardPasses.h
+++ b/include/llvm/Support/StandardPasses.h
@@ -20,6 +20,7 @@
#define LLVM_SUPPORT_STANDARDPASSES_H
#include "llvm/PassManager.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Transforms/Scalar.h"
@@ -116,7 +117,6 @@ namespace llvm {
PM->add(createArgumentPromotionPass()); // Scalarize uninlined fn args
// Start of function pass.
-
PM->add(createScalarReplAggregatesPass()); // Break up aggregate allocas
if (SimplifyLibCalls)
PM->add(createSimplifyLibCallsPass()); // Library Call Optimizations
@@ -146,6 +146,7 @@ namespace llvm {
// opened up by them.
PM->add(createInstructionCombiningPass());
PM->add(createJumpThreadingPass()); // Thread jumps
+ PM->add(createCorrelatedValuePropagationPass());
PM->add(createDeadStoreEliminationPass()); // Delete dead stores
PM->add(createAggressiveDCEPass()); // Delete dead instructions
PM->add(createCFGSimplificationPass()); // Merge & remove BBs
diff --git a/include/llvm/Support/SystemUtils.h b/include/llvm/Support/SystemUtils.h
index b3d83fc24345..3c182c1ca8b0 100644
--- a/include/llvm/Support/SystemUtils.h
+++ b/include/llvm/Support/SystemUtils.h
@@ -21,10 +21,9 @@ namespace llvm {
class raw_ostream;
namespace sys { class Path; }
-/// Determine if the raw_ostream provided is connected to the outs() and
-/// displayed or not (to a console window). If so, generate a warning message
-/// advising against display of bitcode and return true. Otherwise just return
-/// false
+/// Determine if the raw_ostream provided is connected to a terminal. If so,
+/// generate a warning message to errs() advising against display of bitcode
+/// and return true. Otherwise just return false.
/// @brief Check for output written to a console
bool CheckBitcodeOutputToConsole(
raw_ostream &stream_to_check, ///< The stream to be checked
diff --git a/include/llvm/Support/TypeBuilder.h b/include/llvm/Support/TypeBuilder.h
index 270ac529c7e7..81c2747b6c05 100644
--- a/include/llvm/Support/TypeBuilder.h
+++ b/include/llvm/Support/TypeBuilder.h
@@ -73,7 +73,7 @@ namespace llvm {
///
/// TypeBuilder cannot handle recursive types or types you only know at runtime.
/// If you try to give it a recursive type, it will deadlock, infinitely
-/// recurse, or throw a recursive_init exception.
+/// recurse, or do something similarly undesirable.
template<typename T, bool cross_compilable> class TypeBuilder {};
// Types for use with cross-compilable TypeBuilders. These correspond
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index bb9a52330d7f..39bdbd804c27 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -58,10 +58,6 @@ private:
ExternalBuffer
} BufferMode;
- /// Error This flag is true if an error of any kind has been detected.
- ///
- bool Error;
-
public:
// color order matches ANSI escape sequence, don't change
enum Colors {
@@ -77,7 +73,7 @@ public:
};
explicit raw_ostream(bool unbuffered=false)
- : BufferMode(unbuffered ? Unbuffered : InternalBuffer), Error(false) {
+ : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
// Start out ready to flush.
OutBufStart = OutBufEnd = OutBufCur = 0;
}
@@ -87,21 +83,6 @@ public:
/// tell - Return the current offset with the file.
uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); }
- /// has_error - Return the value of the flag in this raw_ostream indicating
- /// whether an output error has been encountered.
- /// This doesn't implicitly flush any pending output.
- bool has_error() const {
- return Error;
- }
-
- /// clear_error - Set the flag read by has_error() to false. If the error
- /// flag is set at the time when this raw_ostream's destructor is called,
- /// report_fatal_error is called to report the error. Use clear_error()
- /// after handling the error to avoid this behavior.
- void clear_error() {
- Error = false;
- }
-
//===--------------------------------------------------------------------===//
// Configuration Interface
//===--------------------------------------------------------------------===//
@@ -234,7 +215,7 @@ public:
/// @param bold bold/brighter text, default false
/// @param bg if true change the background, default: change foreground
/// @returns itself so it can be used within << invocations
- virtual raw_ostream &changeColor(enum Colors, bool = false, bool = false) {
+ virtual raw_ostream &changeColor(enum Colors, bool = false, bool = false) {
return *this; }
/// Resets the colors to terminal defaults. Call this when you are done
@@ -285,10 +266,6 @@ protected:
/// underlying output mechanism.
virtual size_t preferred_buffer_size() const;
- /// error_detected - Set the flag indicating that an output error has
- /// been encountered.
- void error_detected() { Error = true; }
-
/// getBufferStart - Return the beginning of the current stream buffer, or 0
/// if the stream is unbuffered.
const char *getBufferStart() const { return OutBufStart; }
@@ -319,6 +296,11 @@ private:
class raw_fd_ostream : public raw_ostream {
int FD;
bool ShouldClose;
+
+ /// Error This flag is true if an error of any kind has been detected.
+ ///
+ bool Error;
+
uint64_t pos;
/// write_impl - See raw_ostream::write_impl.
@@ -331,6 +313,10 @@ class raw_fd_ostream : public raw_ostream {
/// preferred_buffer_size - Determine an efficient buffer size.
virtual size_t preferred_buffer_size() const;
+ /// error_detected - Set the flag indicating that an output error has
+ /// been encountered.
+ void error_detected() { Error = true; }
+
public:
enum {
@@ -353,8 +339,11 @@ public:
/// be immediately destroyed; the string will be empty if no error occurred.
/// This allows optional flags to control how the file will be opened.
///
- /// \param Filename - The file to open. If this is "-" then the
- /// stream will use stdout instead.
+ /// As a special case, if Filename is "-", then the stream will use
+ /// STDOUT_FILENO instead of opening a file. Note that it will still consider
+ /// itself to own the file descriptor. In particular, it will close the
+ /// file descriptor when it is done (this is necessary to detect
+ /// output errors).
raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
unsigned Flags = 0);
@@ -362,15 +351,17 @@ public:
/// ShouldClose is true, this closes the file when the stream is destroyed.
raw_fd_ostream(int fd, bool shouldClose,
bool unbuffered=false) : raw_ostream(unbuffered), FD(fd),
- ShouldClose(shouldClose) {}
+ ShouldClose(shouldClose),
+ Error(false) {}
~raw_fd_ostream();
/// close - Manually flush the stream and close the file.
+ /// Note that this does not call fsync.
void close();
/// seek - Flushes the stream and repositions the underlying file descriptor
- /// positition to the offset specified from the beginning of the file.
+ /// positition to the offset specified from the beginning of the file.
uint64_t seek(uint64_t off);
virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
@@ -378,24 +369,27 @@ public:
virtual raw_ostream &resetColor();
virtual bool is_displayed() const;
-};
-/// raw_stdout_ostream - This is a stream that always prints to stdout.
-///
-class raw_stdout_ostream : public raw_fd_ostream {
- // An out of line virtual method to provide a home for the class vtable.
- virtual void handle();
-public:
- raw_stdout_ostream();
-};
+ /// has_error - Return the value of the flag in this raw_fd_ostream indicating
+ /// whether an output error has been encountered.
+ /// This doesn't implicitly flush any pending output. Also, it doesn't
+ /// guarantee to detect all errors unless the the stream has been closed.
+ bool has_error() const {
+ return Error;
+ }
-/// raw_stderr_ostream - This is a stream that always prints to stderr.
-///
-class raw_stderr_ostream : public raw_fd_ostream {
- // An out of line virtual method to provide a home for the class vtable.
- virtual void handle();
-public:
- raw_stderr_ostream();
+ /// clear_error - Set the flag read by has_error() to false. If the error
+ /// flag is set at the time when this raw_ostream's destructor is called,
+ /// report_fatal_error is called to report the error. Use clear_error()
+ /// after handling the error to avoid this behavior.
+ ///
+ /// "Errors should never pass silently.
+ /// Unless explicitly silenced."
+ /// - from The Zen of Python, by Tim Peters
+ ///
+ void clear_error() {
+ Error = false;
+ }
};
/// outs() - This returns a reference to a raw_ostream for standard output.
@@ -461,7 +455,7 @@ public:
/// outside of the raw_svector_ostream's control. It is only safe to do this
/// if the raw_svector_ostream has previously been flushed.
void resync();
-
+
/// str - Flushes the stream contents to the target vector and return a
/// StringRef for the vector contents.
StringRef str();
@@ -481,6 +475,45 @@ public:
~raw_null_ostream();
};
+/// tool_output_file - This class contains a raw_fd_ostream and adds a
+/// few extra features commonly needed for compiler-like tool output files:
+/// - The file is automatically deleted if the process is killed.
+/// - The file is automatically deleted when the tool_output_file
+/// object is destroyed unless the client calls keep().
+class tool_output_file {
+ /// Installer - This class is declared before the raw_fd_ostream so that
+ /// it is constructed before the raw_fd_ostream is constructed and
+ /// destructed after the raw_fd_ostream is destructed. It installs
+ /// cleanups in its constructor and uninstalls them in its destructor.
+ class CleanupInstaller {
+ /// Filename - The name of the file.
+ std::string Filename;
+ public:
+ /// Keep - The flag which indicates whether we should not delete the file.
+ bool Keep;
+
+ explicit CleanupInstaller(const char *filename);
+ ~CleanupInstaller();
+ } Installer;
+
+ /// OS - The contained stream. This is intentionally declared after
+ /// Installer.
+ raw_fd_ostream OS;
+
+public:
+ /// tool_output_file - This constructor's arguments are passed to
+ /// to raw_fd_ostream's constructor.
+ tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags = 0);
+
+ /// os - Return the contained raw_fd_ostream.
+ raw_fd_ostream &os() { return OS; }
+
+ /// keep - Indicate that the tool's job wrt this output file has been
+ /// successful and the file should not be deleted.
+ void keep() { Installer.Keep = true; }
+};
+
} // end llvm namespace
#endif
diff --git a/include/llvm/System/Memory.h b/include/llvm/System/Memory.h
index 01bcab1f0070..2dd36e8ab147 100644
--- a/include/llvm/System/Memory.h
+++ b/include/llvm/System/Memory.h
@@ -63,7 +63,6 @@ namespace sys {
///
/// On success, this returns false, otherwise it returns true and fills
/// in *ErrMsg.
- /// @throws std::string if an error occurred.
/// @brief Release Read/Write/Execute memory.
static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = 0);
diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h
index 0461769f9710..23b18d47145a 100644
--- a/include/llvm/System/Path.h
+++ b/include/llvm/System/Path.h
@@ -164,6 +164,7 @@ namespace sys {
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup and the address of main itself.
+ /// In extremis, this function may fail and return an empty path.
static Path GetMainExecutable(const char *argv0, void *MainAddr);
/// This is one of the very few ways in which a path can be constructed
@@ -336,9 +337,9 @@ namespace sys {
/// native Dynamic Library (shared library, shared object) by looking at
/// the file's magic number. The Path object must reference a file, not a
/// directory.
- /// @return strue if the file starts with the magid number for a native
+ /// @returns true if the file starts with the magic number for a native
/// shared library.
- /// @brief Determine if the path reference a dynamic library.
+ /// @brief Determine if the path references a dynamic library.
bool isDynamicLibrary() const;
/// This function determines if the path name references an existing file
diff --git a/include/llvm/System/Process.h b/include/llvm/System/Process.h
index 010499acd4bf..41bcd69b6a44 100644
--- a/include/llvm/System/Process.h
+++ b/include/llvm/System/Process.h
@@ -30,7 +30,6 @@ namespace sys {
/// This static function will return the operating system's virtual memory
/// page size.
/// @returns The number of bytes in a virtual memory page.
- /// @throws nothing
/// @brief Get the virtual memory page size
static unsigned GetPageSize();
@@ -38,7 +37,6 @@ namespace sys {
/// by the process. This only counts the memory allocated via the malloc,
/// calloc and realloc functions and includes any "free" holes in the
/// allocated space.
- /// @throws nothing
/// @brief Return process memory usage.
static size_t GetMallocUsage();
diff --git a/include/llvm/System/Program.h b/include/llvm/System/Program.h
index 69ce47892e14..7017305a2eb6 100644
--- a/include/llvm/System/Program.h
+++ b/include/llvm/System/Program.h
@@ -116,7 +116,6 @@ namespace sys {
/// locations to search (e.g. the PATH on Unix).
/// @returns A Path object initialized to the path of the program or a
/// Path object that is empty (invalid) if the program could not be found.
- /// @throws nothing
/// @brief Construct a Program by finding it by name.
static Path FindProgramByName(const std::string& name);
@@ -129,7 +128,6 @@ namespace sys {
/// A convenience function equivalent to Program prg; prg.Execute(..);
/// prg.Wait(..);
- /// @throws nothing
/// @see Execute, Wait
static int ExecuteAndWait(const Path& path,
const char** args,
@@ -140,7 +138,6 @@ namespace sys {
std::string* ErrMsg = 0);
/// A convenience function equivalent to Program prg; prg.Execute(..);
- /// @throws nothing
/// @see Execute
static void ExecuteNoWait(const Path& path,
const char** args,
diff --git a/include/llvm/System/Signals.h b/include/llvm/System/Signals.h
index 504420cd402d..7f1c87c3d55a 100644
--- a/include/llvm/System/Signals.h
+++ b/include/llvm/System/Signals.h
@@ -29,6 +29,10 @@ namespace sys {
/// @brief Remove a file if a fatal signal occurs.
bool RemoveFileOnSignal(const Path &Filename, std::string* ErrMsg = 0);
+ /// This function removes a file from the list of files to be removed on
+ /// signal delivery.
+ void DontRemoveFileOnSignal(const Path &Filename);
+
/// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the
/// process, print a stack trace and then exit.
/// @brief Print a stack trace if a fatal signal occurs.
diff --git a/include/llvm/System/ThreadLocal.h b/include/llvm/System/ThreadLocal.h
index 39b1e64be0cd..e6edd79d6ff1 100644
--- a/include/llvm/System/ThreadLocal.h
+++ b/include/llvm/System/ThreadLocal.h
@@ -19,6 +19,8 @@
namespace llvm {
namespace sys {
+ // ThreadLocalImpl - Common base class of all ThreadLocal instantiations.
+ // YOU SHOULD NEVER USE THIS DIRECTLY.
class ThreadLocalImpl {
void* data;
public:
@@ -26,14 +28,25 @@ namespace llvm {
virtual ~ThreadLocalImpl();
void setInstance(const void* d);
const void* getInstance();
+ void removeInstance();
};
+ /// ThreadLocal - A class used to abstract thread-local storage. It holds,
+ /// for each thread, a pointer a single object of type T.
template<class T>
class ThreadLocal : public ThreadLocalImpl {
public:
ThreadLocal() : ThreadLocalImpl() { }
+
+ /// get - Fetches a pointer to the object associated with the current
+ /// thread. If no object has yet been associated, it returns NULL;
T* get() { return static_cast<T*>(getInstance()); }
+
+ // set - Associates a pointer to an object with the current thread.
void set(T* d) { setInstance(d); }
+
+ // erase - Removes the pointer associated with the current thread.
+ void erase() { removeInstance(); }
};
}
}
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 9a89dc942d6c..b141a77df4f2 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -198,6 +198,7 @@ class Instruction {
bit isReturn = 0; // Is this instruction a return instruction?
bit isBranch = 0; // Is this instruction a branch instruction?
bit isIndirectBranch = 0; // Is this instruction an indirect branch?
+ bit isCompare = 0; // Is this instruction a comparison instruction?
bit isBarrier = 0; // Can control flow fall through this instruction?
bit isCall = 0; // Is this instruction a call instruction?
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
@@ -409,7 +410,7 @@ def INLINEASM : Instruction {
let InOperandList = (ins variable_ops);
let AsmString = "";
}
-def DBG_LABEL : Instruction {
+def PROLOG_LABEL : Instruction {
let OutOperandList = (outs);
let InOperandList = (ins i32imm:$id);
let AsmString = "";
@@ -510,10 +511,6 @@ class AsmParser {
// perform target specific instruction post-processing.
string AsmParserInstCleanup = "";
- // MatchInstructionName - The name of the instruction matching function to
- // generate.
- string MatchInstructionName = "MatchInstruction";
-
// Variant - AsmParsers can be of multiple different variants. Variants are
// used to support targets that need to parser multiple formats for the
// assembly language.
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
index f431c38dc3a5..5830d1f99f5c 100644
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -28,14 +28,20 @@ class TargetAsmParser : public MCAsmParserExtension {
protected: // Can only create subclasses.
TargetAsmParser(const Target &);
- /// TheTarget - The Target that this machine was created for.
+ /// The Target that this machine was created for.
const Target &TheTarget;
+ /// The current set of available features.
+ unsigned AvailableFeatures;
+
public:
virtual ~TargetAsmParser();
const Target &getTarget() const { return TheTarget; }
+ unsigned getAvailableFeatures() const { return AvailableFeatures; }
+ void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
+
/// ParseInstruction - Parse one assembly instruction.
///
/// The parser is positioned following the instruction name. The target
@@ -67,8 +73,12 @@ public:
/// MatchInstruction - Recognize a series of operands of a parsed instruction
/// as an actual MCInst. This returns false and fills in Inst on success and
/// returns true on failure to match.
+ ///
+ /// On failure, the target parser is responsible for emitting a diagnostic
+ /// explaining the match failure.
virtual bool
- MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCInst &Inst) = 0;
};
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index ceaeb0b5038b..6da3ba13bb35 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -42,7 +42,7 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
}
-/// CCIfCC - Match of the current calling convention is 'CC'.
+/// CCIfCC - Match if the current calling convention is 'CC'.
class CCIfCC<string CC, CCAction A>
: CCIf<!strconcat("State.getCallingConv() == ", CC), A> {}
@@ -89,6 +89,13 @@ class CCAssignToStack<int size, int align> : CCAction {
int Align = align;
}
+/// CCAssignToStackWithShadow - Same as CCAssignToStack, but with a register
+/// to be shadowed.
+class CCAssignToStackWithShadow<int size, int align, Register reg> :
+ CCAssignToStack<size, align> {
+ Register ShadowReg = reg;
+}
+
/// CCPassByVal - This action always matches: it assigns the value to a stack
/// slot to implement ByVal aggregate parameter passing. Size and alignment
/// specify the minimum size and alignment for the stack slot.
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index cc88dae9fa68..b89cbe0133f8 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -50,13 +50,13 @@ enum AlignTypeEnum {
/// padding and make the structure slightly more cache friendly.
struct TargetAlignElem {
AlignTypeEnum AlignType : 8; //< Alignment type (AlignTypeEnum)
- unsigned char ABIAlign; //< ABI alignment for this type/bitw
- unsigned char PrefAlign; //< Pref. alignment for this type/bitw
+ unsigned ABIAlign; //< ABI alignment for this type/bitw
+ unsigned PrefAlign; //< Pref. alignment for this type/bitw
uint32_t TypeBitWidth; //< Type bit width
/// Initializer
- static TargetAlignElem get(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width);
+ static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width);
/// Equality predicate
bool operator==(const TargetAlignElem &rhs) const;
};
@@ -64,9 +64,9 @@ struct TargetAlignElem {
class TargetData : public ImmutablePass {
private:
bool LittleEndian; ///< Defaults to false
- unsigned char PointerMemSize; ///< Pointer size in bytes
- unsigned char PointerABIAlign; ///< Pointer ABI alignment
- unsigned char PointerPrefAlign; ///< Pointer preferred alignment
+ unsigned PointerMemSize; ///< Pointer size in bytes
+ unsigned PointerABIAlign; ///< Pointer ABI alignment
+ unsigned PointerPrefAlign; ///< Pointer preferred alignment
SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
@@ -86,12 +86,12 @@ private:
mutable void *LayoutMap;
//! Set/initialize target alignments
- void setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width);
+ void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width);
unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
bool ABIAlign, const Type *Ty) const;
//! Internal helper method that returns requested alignment for type.
- unsigned char getAlignment(const Type *Ty, bool abi_or_pref) const;
+ unsigned getAlignment(const Type *Ty, bool abi_or_pref) const;
/// Valid alignment predicate.
///
@@ -110,7 +110,7 @@ public:
/// Constructs a TargetData from a specification string. See init().
explicit TargetData(StringRef TargetDescription)
- : ImmutablePass(&ID) {
+ : ImmutablePass(ID) {
init(TargetDescription);
}
@@ -118,7 +118,7 @@ public:
explicit TargetData(const Module *M);
TargetData(const TargetData &TD) :
- ImmutablePass(&ID),
+ ImmutablePass(ID),
LittleEndian(TD.isLittleEndian()),
PointerMemSize(TD.PointerMemSize),
PointerABIAlign(TD.PointerABIAlign),
@@ -161,13 +161,13 @@ public:
}
/// Target pointer alignment
- unsigned char getPointerABIAlignment() const { return PointerABIAlign; }
+ unsigned getPointerABIAlignment() const { return PointerABIAlign; }
/// Return target's alignment for stack-based pointers
- unsigned char getPointerPrefAlignment() const { return PointerPrefAlign; }
+ unsigned getPointerPrefAlignment() const { return PointerPrefAlign; }
/// Target pointer size
- unsigned char getPointerSize() const { return PointerMemSize; }
+ unsigned getPointerSize() const { return PointerMemSize; }
/// Target pointer size, in bits
- unsigned char getPointerSizeInBits() const { return 8*PointerMemSize; }
+ unsigned getPointerSizeInBits() const { return 8*PointerMemSize; }
/// Size examples:
///
@@ -223,26 +223,26 @@ public:
/// getABITypeAlignment - Return the minimum ABI-required alignment for the
/// specified type.
- unsigned char getABITypeAlignment(const Type *Ty) const;
+ unsigned getABITypeAlignment(const Type *Ty) const;
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
- unsigned char getABIIntegerTypeAlignment(unsigned BitWidth) const;
+ unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
/// getCallFrameTypeAlignment - Return the minimum ABI-required alignment
/// for the specified type when it is part of a call frame.
- unsigned char getCallFrameTypeAlignment(const Type *Ty) const;
+ unsigned getCallFrameTypeAlignment(const Type *Ty) const;
/// getPrefTypeAlignment - Return the preferred stack/global alignment for
/// the specified type. This is always at least as good as the ABI alignment.
- unsigned char getPrefTypeAlignment(const Type *Ty) const;
+ unsigned getPrefTypeAlignment(const Type *Ty) const;
/// getPreferredTypeAlignmentShift - Return the preferred alignment for the
/// specified type, returned as log2 of the value (a shift amount).
///
- unsigned char getPreferredTypeAlignmentShift(const Type *Ty) const;
+ unsigned getPreferredTypeAlignmentShift(const Type *Ty) const;
/// getIntPtrType - Return an unsigned integer type that is the same size or
/// greater to the host pointer size.
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h
index 8f0a6cb1a68e..a127aed8f6df 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/Target/TargetInstrDesc.h
@@ -105,6 +105,7 @@ namespace TID {
IndirectBranch,
Predicable,
NotDuplicable,
+ Compare,
DelaySlot,
FoldableAsLoad,
MayLoad,
@@ -151,6 +152,12 @@ public:
return -1;
}
+ /// getRegClass - Returns the register class constraint for OpNum, or NULL.
+ const TargetRegisterClass *getRegClass(unsigned OpNum,
+ const TargetRegisterInfo *TRI) const {
+ return OpNum < NumOperands ? OpInfo[OpNum].getRegClass(TRI) : 0;
+ }
+
/// getOpcode - Return the opcode number for this descriptor.
unsigned getOpcode() const {
return Opcode;
@@ -315,7 +322,7 @@ public:
bool isIndirectBranch() const {
return Flags & (1 << TID::IndirectBranch);
}
-
+
/// isConditionalBranch - Return true if this is a branch which may fall
/// through to the next instruction or may transfer control flow to some other
/// block. The TargetInstrInfo::AnalyzeBranch method can be used to get more
@@ -340,6 +347,11 @@ public:
return Flags & (1 << TID::Predicable);
}
+ /// isCompare - Return true if this instruction is a comparison.
+ bool isCompare() const {
+ return Flags & (1 << TID::Compare);
+ }
+
/// isNotDuplicable - Return true if this instruction cannot be safely
/// duplicated. For example, if the instruction has a unique labels attached
/// to it, duplicating it would cause multiple definition errors.
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index e42be2676cd2..520c41be7428 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -92,15 +92,6 @@ private:
AliasAnalysis *AA) const;
public:
- /// isMoveInstr - Return true if the instruction is a register to register
- /// move and return the source and dest operands and their sub-register
- /// indices by reference.
- virtual bool isMoveInstr(const MachineInstr& MI,
- unsigned& SrcReg, unsigned& DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- return false;
- }
-
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
@@ -113,22 +104,6 @@ public:
return false;
}
- /// isIdentityCopy - Return true if the instruction is a copy (or
- /// extract_subreg, insert_subreg, subreg_to_reg) where the source and
- /// destination registers are the same.
- bool isIdentityCopy(const MachineInstr &MI) const {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg)
- return true;
-
- if ((MI.getOpcode() == TargetOpcode::INSERT_SUBREG ||
- MI.getOpcode() == TargetOpcode::SUBREG_TO_REG) &&
- MI.getOperand(0).getReg() == MI.getOperand(2).getReg())
- return true;
- return false;
- }
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
@@ -591,18 +566,6 @@ public:
const MachineBasicBlock *MBB,
const MachineFunction &MF) const = 0;
- /// GetInstSize - Returns the size of the specified Instruction.
- ///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const {
- assert(0 && "Target didn't implement TargetInstrInfo::GetInstSize!");
- return 0;
- }
-
- /// GetFunctionSizeInBytes - Returns the size of the specified
- /// MachineFunction.
- ///
- virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const = 0;
-
/// Measure the specified inline asm to determine an approximation of its
/// length.
virtual unsigned getInlineAsmLength(const char *Str,
@@ -613,6 +576,21 @@ public:
/// register allocation.
virtual ScheduleHazardRecognizer*
CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const = 0;
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, int &CmpValue) const {
+ return false;
+ }
+
+ /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
+ MachineInstr *CmpInstr) const {
+ return false;
+ }
};
/// TargetInstrInfoImpl - This is the default implementation of
@@ -646,7 +624,6 @@ public:
virtual bool isSchedulingBoundary(const MachineInstr *MI,
const MachineBasicBlock *MBB,
const MachineFunction &MF) const;
- virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const;
virtual ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 2b6e4fa8526d..29de994a21c9 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -168,6 +168,32 @@ public:
return RC;
}
+ /// getRepRegClassFor - Return the 'representative' register class for the
+ /// specified value type. The 'representative' register class is the largest
+ /// legal super-reg register class for the register class of the value type.
+ /// For example, on i386 the rep register class for i8, i16, and i32 are GR32;
+ /// while the rep register class is GR64 on x86_64.
+ virtual const TargetRegisterClass *getRepRegClassFor(EVT VT) const {
+ assert(VT.isSimple() && "getRepRegClassFor called on illegal type!");
+ const TargetRegisterClass *RC = RepRegClassForVT[VT.getSimpleVT().SimpleTy];
+ return RC;
+ }
+
+ /// getRepRegClassCostFor - Return the cost of the 'representative' register
+ /// class for the specified value type.
+ virtual uint8_t getRepRegClassCostFor(EVT VT) const {
+ assert(VT.isSimple() && "getRepRegClassCostFor called on illegal type!");
+ return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
+ }
+
+ /// getRegPressureLimit - Return the register pressure "high water mark" for
+ /// the specific register class. The scheduler is in high register pressure
+ /// mode (for the specific register class) if it goes over the limit.
+ virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ return 0;
+ }
+
/// isTypeLegal - Return true if the target has native support for the
/// specified value type. This means that it has a register that directly
/// holds it without promotions or expansions.
@@ -188,24 +214,53 @@ public:
/// ValueTypeActions - For each value type, keep a LegalizeAction enum
/// that indicates how instruction selection should deal with the type.
uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
+
+ LegalizeAction getExtendedTypeAction(EVT VT) const {
+ // Handle non-vector integers.
+ if (!VT.isVector()) {
+ assert(VT.isInteger() && "Unsupported extended type!");
+ unsigned BitSize = VT.getSizeInBits();
+ // First promote to a power-of-two size, then expand if necessary.
+ if (BitSize < 8 || !isPowerOf2_32(BitSize))
+ return Promote;
+ return Expand;
+ }
+
+ // If this is a type smaller than a legal vector type, promote to that
+ // type, e.g. <2 x float> -> <4 x float>.
+ if (VT.getVectorElementType().isSimple() &&
+ VT.getVectorNumElements() != 1) {
+ MVT EltType = VT.getVectorElementType().getSimpleVT();
+ unsigned NumElts = VT.getVectorNumElements();
+ while (1) {
+ // Round up to the nearest power of 2.
+ NumElts = (unsigned)NextPowerOf2(NumElts);
+
+ MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
+ if (LargerVector == MVT()) break;
+
+ // If this the larger type is legal, promote to it.
+ if (getTypeAction(LargerVector) == Legal) return Promote;
+ }
+ }
+
+ return VT.isPow2VectorType() ? Expand : Promote;
+ }
public:
ValueTypeActionImpl() {
std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
}
- LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
- if (VT.isExtended()) {
- if (VT.isVector()) {
- return VT.isPow2VectorType() ? Expand : Promote;
- }
- if (VT.isInteger())
- // First promote to a power-of-two size, then expand if necessary.
- return VT == VT.getRoundIntegerType(Context) ? Expand : Promote;
- assert(0 && "Unsupported extended type!");
- return Legal;
- }
- unsigned I = VT.getSimpleVT().SimpleTy;
- return (LegalizeAction)ValueTypeActions[I];
+
+ LegalizeAction getTypeAction(EVT VT) const {
+ if (!VT.isExtended())
+ return getTypeAction(VT.getSimpleVT());
+ return getExtendedTypeAction(VT);
}
+
+ LegalizeAction getTypeAction(MVT VT) const {
+ return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
+ }
+
void setTypeAction(EVT VT, LegalizeAction Action) {
unsigned I = VT.getSimpleVT().SimpleTy;
ValueTypeActions[I] = Action;
@@ -220,10 +275,13 @@ public:
/// it is already legal (return 'Legal') or we need to promote it to a larger
/// type (return 'Promote'), or we need to expand it into multiple registers
/// of smaller integer type (return 'Expand'). 'Custom' is not an option.
- LegalizeAction getTypeAction(LLVMContext &Context, EVT VT) const {
- return ValueTypeActions.getTypeAction(Context, VT);
+ LegalizeAction getTypeAction(EVT VT) const {
+ return ValueTypeActions.getTypeAction(VT);
}
-
+ LegalizeAction getTypeAction(MVT VT) const {
+ return ValueTypeActions.getTypeAction(VT);
+ }
+
/// getTypeToTransformTo - For types supported by the target, this is an
/// identity function. For types that must be promoted to larger types, this
/// returns the larger type to promote to. For integer types that are larger
@@ -235,7 +293,7 @@ public:
assert((unsigned)VT.getSimpleVT().SimpleTy <
array_lengthof(TransformToType));
EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy];
- assert(getTypeAction(Context, NVT) != Promote &&
+ assert(getTypeAction(NVT) != Promote &&
"Promote may not follow Expand or Promote");
return NVT;
}
@@ -250,17 +308,16 @@ public:
EltVT : EVT::getVectorVT(Context, EltVT, NumElts / 2);
}
// Promote to a power of two size, avoiding multi-step promotion.
- return getTypeAction(Context, NVT) == Promote ?
+ return getTypeAction(NVT) == Promote ?
getTypeToTransformTo(Context, NVT) : NVT;
} else if (VT.isInteger()) {
EVT NVT = VT.getRoundIntegerType(Context);
- if (NVT == VT)
- // Size is a power of two - expand to half the size.
+ if (NVT == VT) // Size is a power of two - expand to half the size.
return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2);
- else
- // Promote to a power of two size, avoiding multi-step promotion.
- return getTypeAction(Context, NVT) == Promote ?
- getTypeToTransformTo(Context, NVT) : NVT;
+
+ // Promote to a power of two size, avoiding multi-step promotion.
+ return getTypeAction(NVT) == Promote ?
+ getTypeToTransformTo(Context, NVT) : NVT;
}
assert(0 && "Unsupported extended type!");
return MVT(MVT::Other); // Not reached
@@ -273,7 +330,7 @@ public:
EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
assert(!VT.isVector());
while (true) {
- switch (getTypeAction(Context, VT)) {
+ switch (getTypeAction(VT)) {
case Legal:
return VT;
case Expand:
@@ -766,6 +823,12 @@ public:
return false;
}
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can be
+ /// used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const {
+ return 0;
+ }
+
//===--------------------------------------------------------------------===//
// TargetLowering Optimization Methods
//
@@ -981,6 +1044,11 @@ protected:
Synthesizable[VT.getSimpleVT().SimpleTy] = isSynthesizable;
}
+ /// findRepresentativeClass - Return the largest legal super-reg register class
+ /// of the register class for the specified type and its associated "cost".
+ virtual std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void computeRegisterProperties();
@@ -1562,6 +1630,19 @@ private:
unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
+ /// RepRegClassForVT - This indicates the "representative" register class to
+ /// use for each ValueType the target supports natively. This information is
+ /// used by the scheduler to track register pressure. By default, the
+ /// representative register class is the largest legal super-reg register
+ /// class of the register class of the specified type. e.g. On x86, i8, i16,
+ /// and i32's representative class would be GR32.
+ const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
+
+ /// RepRegClassCostForVT - This indicates the "cost" of the "representative"
+ /// register class for each ValueType. The cost is used by the scheduler to
+ /// approximate register pressure.
+ uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
+
/// Synthesizable indicates whether it is OK for the compiler to create new
/// operations using this type. All Legal types are Synthesizable except
/// MMX types on X86. Non-Legal types are not Synthesizable.
@@ -1672,6 +1753,15 @@ protected:
/// This field specifies whether the target can benefit from code placement
/// optimization.
bool benefitFromCodePlacementOpt;
+
+private:
+ /// isLegalRC - Return true if the value types that can be represented by the
+ /// specified register class are all legal.
+ bool isLegalRC(const TargetRegisterClass *RC) const;
+
+ /// hasLegalSuperRegRegClasses - Return true if the specified register class
+ /// has one or more super-reg register classes that are legal.
+ bool hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const;
};
/// GetReturnInfo - Given an LLVM IR type and return type attributes,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 227499b37055..42e99e015644 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -75,7 +75,8 @@ namespace Sched {
None, // No preference
Latency, // Scheduling for shortest total latency.
RegPressure, // Scheduling for lowest register pressure.
- Hybrid // Scheduling for both latency and register pressure.
+ Hybrid, // Scheduling for both latency and register pressure.
+ ILP // Scheduling for ILP in low register pressure mode.
};
}
@@ -244,6 +245,18 @@ public:
bool = true) {
return true;
}
+
+ /// addPassesToEmitMC - Add passes to the specified pass manager to get
+ /// machine code emitted with the MCJIT. This method returns true if machine
+ /// code is not supported. It fills the MCContext Ctx pointer which can be
+ /// used to build custom MCStreamer.
+ ///
+ virtual bool addPassesToEmitMC(PassManagerBase &,
+ MCContext *&,
+ CodeGenOpt::Level,
+ bool = true) {
+ return true;
+ }
};
/// LLVMTargetMachine - This class describes a target machine that is
@@ -287,12 +300,27 @@ public:
JITCodeEmitter &MCE,
CodeGenOpt::Level,
bool DisableVerify = true);
+
+ /// addPassesToEmitMC - Add passes to the specified pass manager to get
+ /// machine code emitted with the MCJIT. This method returns true if machine
+ /// code is not supported. It fills the MCContext Ctx pointer which can be
+ /// used to build custom MCStreamer.
+ ///
+ virtual bool addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify = true);
/// Target-Independent Code Generator Pass Configuration Options.
-
- /// addInstSelector - This method should add any "last minute" LLVM->LLVM
- /// passes, then install an instruction selector pass, which converts from
- /// LLVM code to machine instructions.
+
+ /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
+ /// passes (which are run just before instruction selector).
+ virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
+ return true;
+ }
+
+ /// addInstSelector - This method should install an instruction selector pass,
+ /// which converts from LLVM code to machine instructions.
virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
return true;
}
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index cb772ecd77de..01fba6628eff 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -25,7 +25,7 @@ namespace TargetOpcode {
enum {
PHI = 0,
INLINEASM = 1,
- DBG_LABEL = 2,
+ PROLOG_LABEL = 2,
EH_LABEL = 3,
GC_LABEL = 4,
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index b36988097cfc..97ceffdaecb8 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -71,13 +71,18 @@ namespace llvm {
/// UnsafeFPMath implies LessPreciseFPMAD.
extern bool UnsafeFPMath;
- /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
- /// option is specified on the command line. If this returns false (default),
- /// the code generator is not allowed to assume that FP arithmetic arguments
- /// and results are never NaNs or +-Infs.
- extern bool FiniteOnlyFPMathOption;
- extern bool FiniteOnlyFPMath();
-
+ /// NoInfsFPMath - This flag is enabled when the
+ /// -enable-no-infs-fp-math flag is specified on the command line. When
+ /// this flag is off (the default), the code generator is not allowed to
+ /// assume the FP arithmetic arguments and results are never +-Infs.
+ extern bool NoInfsFPMath;
+
+ /// NoNaNsFPMath - This flag is enabled when the
+ /// -enable-no-nans-fp-math flag is specified on the command line. When
+ /// this flag is off (the default), the code generator is not allowed to
+ /// assume the FP arithmetic arguments and results are never NaNs.
+ extern bool NoNaNsFPMath;
+
/// HonorSignDependentRoundingFPMath - This returns true when the
/// -enable-sign-dependent-rounding-fp-math is specified. If this returns
/// false (the default), the code generator is allowed to assume that the
@@ -135,8 +140,8 @@ namespace llvm {
/// StackAlignment - Override default stack alignment for target.
extern unsigned StackAlignment;
- /// RealignStack - This flag indicates, whether stack should be automatically
- /// realigned, if needed.
+ /// RealignStack - This flag indicates whether the stack should be
+ /// automatically realigned, if needed.
extern bool RealignStack;
/// DisableJumpTables - This flag indicates jump tables should not be
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index f6ac2b7b1a8f..81dec3e5b78d 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -301,7 +301,7 @@ public:
/// considered to be a 'virtual' register, which is part of the SSA
/// namespace. This must be the same for all targets, which means that each
/// target is limited to this fixed number of registers.
- FirstVirtualRegister = 1024
+ FirstVirtualRegister = 16384
};
/// isPhysicalRegister - Return true if the specified register number is in
@@ -593,6 +593,13 @@ public:
return false;
}
+ /// requiresVirtualBaseRegisters - Returns true if the target wants the
+ /// LocalStackAllocation pass to be run and virtual base registers
+ /// used for more efficient stack access.
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return false;
+ }
+
/// hasFP - Return true if the specified function should have a dedicated
/// frame pointer register. For most targets this is true only if the function
/// has variable sized allocas or if frame pointer elimination is disabled.
@@ -603,18 +610,18 @@ public:
/// immediately on entry to the current function. This eliminates the need for
/// add/sub sp brackets around call sites. Returns true if the call frame is
/// included as part of the stack frame.
- virtual bool hasReservedCallFrame(MachineFunction &MF) const {
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
return !hasFP(MF);
}
/// canSimplifyCallFramePseudos - When possible, it's best to simplify the
/// call frame pseudo ops before doing frame index elimination. This is
/// possible only when frame index references between the pseudos won't
- /// need adjusted for the call frame adjustments. Normally, that's true
+ /// need adjusting for the call frame adjustments. Normally, that's true
/// if the function has a reserved call frame or a frame pointer. Some
/// targets (Thumb2, for example) may have more complicated criteria,
/// however, and can override this behavior.
- virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const {
+ virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || hasFP(MF);
}
@@ -624,7 +631,7 @@ public:
/// reserved as its spill slot. This tells PEI not to create a new stack frame
/// object for the given register. It should be called only after
/// processFunctionBeforeCalleeSavedScan().
- virtual bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const {
return false;
}
@@ -636,6 +643,44 @@ public:
return false;
}
+ /// getFrameIndexInstrOffset - Get the offset from the referenced frame
+ /// index in the instruction, if the is one.
+ virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
+ int Idx) const {
+ return 0;
+ }
+
+ /// needsFrameBaseReg - Returns true if the instruction's frame index
+ /// reference would be better served by a base register other than FP
+ /// or SP. Used by LocalStackFrameAllocation to determine which frame index
+ /// references it should create new base registers for.
+ virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ return false;
+ }
+
+ /// materializeFrameBaseRegister - Insert defining instruction(s) for
+ /// BaseReg to be a pointer to FrameIdx before insertion point I.
+ virtual void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ assert(0 && "materializeFrameBaseRegister does not exist on this target");
+ }
+
+ /// resolveFrameIndex - Resolve a frame index operand of an instruction
+ /// to reference the indicated base register plus offset instead.
+ virtual void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ assert(0 && "resolveFrameIndex does not exist on this target");
+ }
+
+ /// isFrameOffsetLegal - Determine whether a given offset immediate is
+ /// encodable to resolve a frame index.
+ virtual bool isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ assert(0 && "isFrameOffsetLegal does not exist on this target");
+ return false; // Must return a value in order to compile with VS 2005
+ }
+
/// getCallFrameSetup/DestroyOpcode - These methods return the opcode of the
/// frame setup/destroy instructions if they exist (-1 otherwise). Some
/// targets use pseudo instructions in order to abstract away the difference
@@ -671,7 +716,7 @@ public:
}
/// processFunctionBeforeFrameFinalized - This method is called immediately
- /// before the specified functions frame layout (MF.getFrameInfo()) is
+ /// before the specified function's frame layout (MF.getFrameInfo()) is
/// finalized. Once the frame is finalized, MO_FrameIndex operands are
/// replaced with direct constants. This method is optional.
///
@@ -698,14 +743,8 @@ public:
/// specified instruction, as long as it keeps the iterator pointing at the
/// finished product. SPAdj is the SP adjustment due to call frame setup
/// instruction.
- ///
- /// When -enable-frame-index-scavenging is enabled, the virtual register
- /// allocated for this frame index is returned and its value is stored in
- /// *Value.
- typedef std::pair<unsigned, int> FrameIndexValue;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS=NULL) const = 0;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const = 0;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index 1418bee00ff3..2817b0c421ed 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -65,7 +65,8 @@ namespace llvm {
const std::string &TT);
typedef TargetAsmLexer *(*AsmLexerCtorTy)(const Target &T,
const MCAsmInfo &MAI);
- typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P);
+ typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P,
+ TargetMachine &TM);
typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
unsigned SyntaxVariant,
@@ -237,10 +238,11 @@ namespace llvm {
///
/// \arg Parser - The target independent parser implementation to use for
/// parsing and lexing.
- TargetAsmParser *createAsmParser(MCAsmParser &Parser) const {
+ TargetAsmParser *createAsmParser(MCAsmParser &Parser,
+ TargetMachine &TM) const {
if (!AsmParserCtorFn)
return 0;
- return AsmParserCtorFn(*this, Parser);
+ return AsmParserCtorFn(*this, Parser, TM);
}
/// createAsmPrinter - Create a target specific assembly printer pass. This
@@ -276,9 +278,9 @@ namespace llvm {
///
/// \arg TT - The target triple.
/// \arg Ctx - The target context.
- /// \arg TAB - The target assembler backend object.
+ /// \arg TAB - The target assembler backend object. Takes ownership.
/// \arg _OS - The stream object.
- /// \arg _Emitter - The target independent assembler object.
+ /// \arg _Emitter - The target independent assembler object.Takes ownership.
/// \arg RelaxAll - Relax all fixups?
MCStreamer *createObjectStreamer(const std::string &TT, MCContext &Ctx,
TargetAsmBackend &TAB,
@@ -667,8 +669,9 @@ namespace llvm {
}
private:
- static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P) {
- return new AsmParserImpl(T, P);
+ static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P,
+ TargetMachine &TM) {
+ return new AsmParserImpl(T, P, TM);
}
};
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
index 951e7fa86b5d..1891f879741a 100644
--- a/include/llvm/Target/TargetSelect.h
+++ b/include/llvm/Target/TargetSelect.h
@@ -16,7 +16,7 @@
#ifndef LLVM_TARGET_TARGETSELECT_H
#define LLVM_TARGET_TARGETSELECT_H
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
extern "C" {
// Declare all of the target-initialization functions that are available.
@@ -100,15 +100,22 @@ namespace llvm {
/// It is legal for a client to make multiple calls to this function.
inline bool InitializeNativeTarget() {
// If we have a native target, initialize it to ensure it is linked in.
-#ifdef LLVM_NATIVE_ARCH
-#define DoInit2(TARG) \
- LLVMInitialize ## TARG ## Info (); \
- LLVMInitialize ## TARG ()
-#define DoInit(T) DoInit2(T)
- DoInit(LLVM_NATIVE_ARCH);
+#ifdef LLVM_NATIVE_TARGET
+ LLVM_NATIVE_TARGETINFO();
+ LLVM_NATIVE_TARGET();
+ return false;
+#else
+ return true;
+#endif
+ }
+
+ /// InitializeNativeTargetAsmPrinter - The main program should call
+ /// this function to initialize the native target asm printer.
+ inline bool InitializeNativeTargetAsmPrinter() {
+ // If we have a native target, initialize the corresponding asm printer.
+#ifdef LLVM_NATIVE_ASMPRINTER
+ LLVM_NATIVE_ASMPRINTER();
return false;
-#undef DoInit
-#undef DoInit2
#else
return true;
#endif
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 8fb4b63c6463..0de100348d0f 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -93,8 +93,7 @@ ModulePass *createGlobalDCEPass();
/// possible, except for the global values specified.
///
ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
- deleteFn = false,
- bool relinkCallees = false);
+ deleteFn = false);
//===----------------------------------------------------------------------===//
/// createFunctionInliningPass - Return a new pass object that uses a heuristic
@@ -181,7 +180,7 @@ Pass *createSingleLoopExtractorPass();
/// createBlockExtractorPass - This pass extracts all blocks (except those
/// specified in the argument list) from the functions in the module.
///
-ModulePass *createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE);
+ModulePass *createBlockExtractorPass();
/// createStripDeadPrototypesPass - This pass removes any function declarations
/// (prototypes) that are not used.
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 6af7ed7bdbfc..3ac4c591c94f 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -30,8 +30,8 @@ namespace llvm {
/// perform the inlining operations that do not depend on the policy.
///
struct Inliner : public CallGraphSCCPass {
- explicit Inliner(void *ID);
- explicit Inliner(void *ID, int Threshold);
+ explicit Inliner(char &ID);
+ explicit Inliner(char &ID, int Threshold);
/// getAnalysisUsage - For this class, we declare that we require and preserve
/// the call graph. If the derived class implements this method, it should
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 0d338b5bc13f..0c35d7e01fa4 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -149,7 +149,6 @@ Pass *createLoopIndexSplitPass();
// ret i32 %Y
//
FunctionPass *createPromoteMemoryToRegisterPass();
-extern const PassInfo *const PromoteMemoryToRegisterID;
//===----------------------------------------------------------------------===//
//
@@ -158,7 +157,7 @@ extern const PassInfo *const PromoteMemoryToRegisterID;
// hacking easier.
//
FunctionPass *createDemoteRegisterToMemoryPass();
-extern const PassInfo *const DemoteRegisterToMemoryID;
+extern char &DemoteRegisterToMemoryID;
//===----------------------------------------------------------------------===//
//
@@ -202,7 +201,7 @@ FunctionPass *createCFGSimplificationPass();
// (set, immediate dominators, tree, and frontier) information.
//
FunctionPass *createBreakCriticalEdgesPass();
-extern const PassInfo *const BreakCriticalEdgesID;
+extern char &BreakCriticalEdgesID;
//===----------------------------------------------------------------------===//
//
@@ -213,7 +212,7 @@ extern const PassInfo *const BreakCriticalEdgesID;
// AU.addRequiredID(LoopSimplifyID);
//
Pass *createLoopSimplifyPass();
-extern const PassInfo *const LoopSimplifyID;
+extern char &LoopSimplifyID;
//===----------------------------------------------------------------------===//
//
@@ -228,7 +227,7 @@ FunctionPass *createTailCallEliminationPass();
// chained binary branch instructions.
//
FunctionPass *createLowerSwitchPass();
-extern const PassInfo *const LowerSwitchID;
+extern char &LowerSwitchID;
//===----------------------------------------------------------------------===//
//
@@ -243,7 +242,7 @@ extern const PassInfo *const LowerSwitchID;
FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
bool useExpensiveEHSupport);
-extern const PassInfo *const LowerInvokePassID;
+extern char &LowerInvokePassID;
//===----------------------------------------------------------------------===//
//
@@ -258,7 +257,7 @@ FunctionPass *createBlockPlacementPass();
// optimizations.
//
Pass *createLCSSAPass();
-extern const PassInfo *const LCSSAID;
+extern char &LCSSAID;
//===----------------------------------------------------------------------===//
//
@@ -304,39 +303,31 @@ FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0);
// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
//
FunctionPass *createInstructionNamerPass();
-extern const PassInfo *const InstructionNamerID;
+extern char &InstructionNamerID;
//===----------------------------------------------------------------------===//
//
-// SSI - This pass converts instructions to Static Single Information form
-// on demand.
-//
-FunctionPass *createSSIPass();
-
-//===----------------------------------------------------------------------===//
-//
-// SSI - This pass converts every non-void instuction to Static Single
-// Information form.
+// GEPSplitter - Split complex GEPs into simple ones
//
-FunctionPass *createSSIEverythingPass();
+FunctionPass *createGEPSplitterPass();
//===----------------------------------------------------------------------===//
//
-// GEPSplitter - Split complex GEPs into simple ones
+// Sink - Code Sinking
//
-FunctionPass *createGEPSplitterPass();
+FunctionPass *createSinkingPass();
//===----------------------------------------------------------------------===//
//
-// ABCD - Elimination of Array Bounds Checks on Demand
+// LowerAtomic - Lower atomic intrinsics to non-atomic form
//
-FunctionPass *createABCDPass();
+Pass *createLowerAtomicPass();
//===----------------------------------------------------------------------===//
//
-// Sink - Code Sinking
+// ValuePropagation - Propagate CFG-derived value information
//
-FunctionPass *createSinkingPass();
+Pass *createCorrelatedValuePropagationPass();
} // End llvm namespace
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 1ca4981ccbbe..62bf92aced49 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -121,8 +121,12 @@ Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI,
/// the function from their old to new values. The final argument captures
/// information about the cloned code if non-null.
///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
Function *CloneFunction(const Function *F,
ValueMap<const Value*, Value*> &VMap,
+ bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo = 0);
/// CloneFunction - Version of the function that doesn't need the VMap.
@@ -133,13 +137,17 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
}
/// Clone OldFunc into NewFunc, transforming the old arguments into references
-/// to ArgMap values. Note that if NewFunc already has basic blocks, the ones
+/// to VMap values. Note that if NewFunc already has basic blocks, the ones
/// cloned into it will be added to the end of the function. This function
/// fills in a list of return instructions, and can optionally append the
/// specified suffix to all values cloned.
///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueMap<const Value*, Value*> &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = 0);
@@ -151,8 +159,13 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
/// constant arguments cause a significant amount of code in the callee to be
/// dead. Since this doesn't produce an exactly copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
+///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueMap<const Value*, Value*> &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix = "",
ClonedCodeInfo *CodeInfo = 0,
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index b2779707276c..caae27f47a44 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -118,8 +118,6 @@ bool EliminateDuplicatePHINodes(BasicBlock *BB);
/// of the CFG. It returns true if a modification was made, possibly deleting
/// the basic block that was pointed to.
///
-/// WARNING: The entry node of a method may not be simplified.
-///
bool SimplifyCFG(BasicBlock *BB, const TargetData *TD = 0);
/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index ca98466b3456..e50a6b15df81 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -36,9 +36,11 @@ private:
//typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
void *AV;
- /// PrototypeValue is an arbitrary representative value, which we derive names
- /// and a type for PHI nodes.
- Value *PrototypeValue;
+ /// ProtoType holds the type of the values being rewritten.
+ const Type *ProtoType;
+
+ // PHI nodes are given a name based on ProtoName.
+ std::string ProtoName;
/// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
/// it creates to the vector.
@@ -51,8 +53,8 @@ public:
~SSAUpdater();
/// Initialize - Reset this object to get ready for a new set of SSA
- /// updates. ProtoValue is the value used to name PHI nodes.
- void Initialize(Value *ProtoValue);
+ /// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+ void Initialize(const Type *Ty, StringRef Name);
/// AddAvailableValue - Indicate that a rewritten value is available at the
/// end of the specified block with the specified value.
@@ -94,6 +96,12 @@ public:
/// for the use's block will be considered to be below it.
void RewriteUse(Use &U);
+ /// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+ /// this version of the method can rewrite uses in the same block as a
+ /// definition, because it assumes that all uses of a value are below any
+ /// inserted values.
+ void RewriteUseAfterInsertions(Use &U);
+
private:
Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
diff --git a/include/llvm/Transforms/Utils/SSI.h b/include/llvm/Transforms/Utils/SSI.h
deleted file mode 100644
index 198fc827bb0b..000000000000
--- a/include/llvm/Transforms/Utils/SSI.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===------------------- SSI.h - Creates SSI Representation -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts a list of variables to the Static Single Information
-// form. This is a program representation described by Scott Ananian in his
-// Master Thesis: "The Static Single Information Form (1999)".
-// We are building an on-demand representation, that is, we do not convert
-// every single variable in the target function to SSI form. Rather, we receive
-// a list of target variables that must be converted. We also do not
-// completely convert a target variable to the SSI format. Instead, we only
-// change the variable in the points where new information can be attached
-// to its live range, that is, at branch points.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_SSI_H
-#define LLVM_TRANSFORMS_UTILS_SSI_H
-
-#include "llvm/InstrTypes.h"
-#include "llvm/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-
-namespace llvm {
-
- class DominatorTree;
- class PHINode;
- class Instruction;
- class CmpInst;
-
- class SSI : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
- SSI() :
- FunctionPass(&ID) {
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const;
-
- bool runOnFunction(Function&);
-
- void createSSI(SmallVectorImpl<Instruction *> &value);
-
- private:
- // Variables always live
- DominatorTree *DT_;
-
- // Stores variables created by SSI
- SmallPtrSet<Instruction *, 16> created;
-
- // Phis created by SSI
- DenseMap<PHINode *, Instruction*> phis;
-
- // Sigmas created by SSI
- DenseMap<PHINode *, Instruction*> sigmas;
-
- // Phi nodes that have a phi as operand and has to be fixed
- SmallPtrSet<PHINode *, 1> phisToFix;
-
- // List of definition points for every variable
- DenseMap<Instruction*, SmallVector<BasicBlock*, 4> > defsites;
-
- // Basic Block of the original definition of each variable
- DenseMap<Instruction*, BasicBlock*> value_original;
-
- // Stack of last seen definition of a variable
- DenseMap<Instruction*, SmallVector<Instruction *, 1> > value_stack;
-
- void insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value);
- void insertSigma(TerminatorInst *TI, Instruction *I);
- void insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value);
- void renameInit(SmallPtrSet<Instruction*, 4> &value);
- void rename(BasicBlock *BB);
-
- void substituteUse(Instruction *I);
- bool dominateAny(BasicBlock *BB, Instruction *value);
- void fixPhis();
-
- Instruction* getPositionPhi(PHINode *PN);
- Instruction* getPositionSigma(PHINode *PN);
-
- void init(SmallVectorImpl<Instruction *> &value);
- void clean();
- };
-} // end namespace
-#endif
diff --git a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index c2d09935e76c..a5060e6f5860 100644
--- a/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -26,7 +26,7 @@ struct UnifyFunctionExitNodes : public FunctionPass {
BasicBlock *ReturnBlock, *UnwindBlock, *UnreachableBlock;
public:
static char ID; // Pass identification, replacement for typeid
- UnifyFunctionExitNodes() : FunctionPass(&ID),
+ UnifyFunctionExitNodes() : FunctionPass(ID),
ReturnBlock(0), UnwindBlock(0) {}
// We can preserve non-critical-edgeness when we unify function exit nodes
diff --git a/lib/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index f4ff643ca03e..5274112897b7 100644
--- a/lib/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -1,4 +1,4 @@
-//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
+//===- ValueMapper.h - Remapping for constants and metadata -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef VALUEMAPPER_H
-#define VALUEMAPPER_H
+#ifndef LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
+#define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
#include "llvm/ADT/ValueMap.h"
@@ -22,8 +22,10 @@ namespace llvm {
class Instruction;
typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
- Value *MapValue(const Value *V, ValueToValueMapTy &VM);
- void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
+ Value *MapValue(const Value *V, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges);
+ void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges);
} // End llvm namespace
#endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 617ef69de467..f7d6fd57a23c 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -82,11 +82,10 @@ public:
IntegerTyID, ///< 8: Arbitrary bit width integers
FunctionTyID, ///< 9: Functions
StructTyID, ///< 10: Structures
- UnionTyID, ///< 11: Unions
- ArrayTyID, ///< 12: Arrays
- PointerTyID, ///< 13: Pointers
- OpaqueTyID, ///< 14: Opaque: type with unknown structure
- VectorTyID, ///< 15: SIMD 'packed' format, or other vector type
+ ArrayTyID, ///< 11: Arrays
+ PointerTyID, ///< 12: Pointers
+ OpaqueTyID, ///< 13: Opaque: type with unknown structure
+ VectorTyID, ///< 14: SIMD 'packed' format, or other vector type
NumTypeIDs, // Must remain as last defined ID
LastPrimitiveTyID = MetadataTyID,
@@ -243,10 +242,6 @@ public:
///
bool isStructTy() const { return ID == StructTyID; }
- /// isUnionTy - True if this is an instance of UnionType.
- ///
- bool isUnionTy() const { return ID == UnionTyID; }
-
/// isArrayTy - True if this is an instance of ArrayType.
///
bool isArrayTy() const { return ID == ArrayTyID; }
@@ -306,7 +301,7 @@ public:
/// does not include vector types.
///
inline bool isAggregateType() const {
- return ID == StructTyID || ID == ArrayTyID || ID == UnionTyID;
+ return ID == StructTyID || ID == ArrayTyID;
}
/// isSized - Return true if it makes sense to take the size of this type. To
@@ -319,8 +314,7 @@ public:
return true;
// If it is not something that can have a size (e.g. a function or label),
// it doesn't have a size.
- if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID &&
- ID != UnionTyID)
+ if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID)
return false;
// If it is something that can have a size and it's concrete, it definitely
// has a size, otherwise we have to try harder to decide.
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
index 2759338f42aa..e1ebc6a51be5 100644
--- a/include/llvm/Use.h
+++ b/include/llvm/Use.h
@@ -210,30 +210,6 @@ public:
unsigned getOperandNo() const;
};
-
-template<> struct simplify_type<value_use_iterator<User> > {
- typedef User* SimpleType;
-
- static SimpleType getSimplifiedValue(const value_use_iterator<User> &Val) {
- return *Val;
- }
-};
-
-template<> struct simplify_type<const value_use_iterator<User> >
- : public simplify_type<value_use_iterator<User> > {};
-
-template<> struct simplify_type<value_use_iterator<const User> > {
- typedef const User* SimpleType;
-
- static SimpleType getSimplifiedValue(const
- value_use_iterator<const User> &Val) {
- return *Val;
- }
-};
-
-template<> struct simplify_type<const value_use_iterator<const User> >
- : public simplify_type<value_use_iterator<const User> > {};
-
} // End llvm namespace
#endif
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index cfb4422a43b7..8740f353ab51 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -215,12 +215,10 @@ public:
ConstantFPVal, // This is an instance of ConstantFP
ConstantArrayVal, // This is an instance of ConstantArray
ConstantStructVal, // This is an instance of ConstantStruct
- ConstantUnionVal, // This is an instance of ConstantUnion
ConstantVectorVal, // This is an instance of ConstantVector
ConstantPointerNullVal, // This is an instance of ConstantPointerNull
MDNodeVal, // This is an instance of MDNode
MDStringVal, // This is an instance of MDString
- NamedMDNodeVal, // This is an instance of NamedMDNode
InlineAsmVal, // This is an instance of InlineAsm
PseudoSourceValueVal, // This is an instance of PseudoSourceValue
FixedStackPseudoSourceValueVal, // This is an instance of
@@ -308,6 +306,10 @@ public:
return const_cast<Value*>(this)->DoPHITranslation(CurBB, PredBB);
}
+ /// MaximumAlignment - This is the greatest alignment value supported by
+ /// load, store, and alloca instructions, and global values.
+ static const unsigned MaximumAlignment = 1u << 29;
+
protected:
unsigned short getSubclassDataFromValue() const { return SubclassData; }
void setValueSubclassData(unsigned short D) { SubclassData = D; }
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index 7497dae3c469..35fc97b2d3ce 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -128,94 +128,6 @@ private:
/// @}
};
-/// This class provides a symbol table of name/NamedMDNode pairs. It is
-/// essentially a StringMap wrapper.
-
-class MDSymbolTable {
- friend class SymbolTableListTraits<NamedMDNode, Module>;
-/// @name Types
-/// @{
-private:
- /// @brief A mapping of names to metadata
- typedef StringMap<NamedMDNode*> MDMap;
-
-public:
- /// @brief An iterator over a ValueMap.
- typedef MDMap::iterator iterator;
-
- /// @brief A const_iterator over a ValueMap.
- typedef MDMap::const_iterator const_iterator;
-
-/// @}
-/// @name Constructors
-/// @{
-public:
-
- MDSymbolTable(const MDNode &); // DO NOT IMPLEMENT
- void operator=(const MDSymbolTable &); // DO NOT IMPLEMENT
- MDSymbolTable() : mmap(0) {}
- ~MDSymbolTable();
-
-/// @}
-/// @name Accessors
-/// @{
-public:
-
- /// This method finds the value with the given \p Name in the
- /// the symbol table.
- /// @returns the NamedMDNode associated with the \p Name
- /// @brief Lookup a named Value.
- NamedMDNode *lookup(StringRef Name) const { return mmap.lookup(Name); }
-
- /// @returns true iff the symbol table is empty
- /// @brief Determine if the symbol table is empty
- inline bool empty() const { return mmap.empty(); }
-
- /// @brief The number of name/type pairs is returned.
- inline unsigned size() const { return unsigned(mmap.size()); }
-
-/// @}
-/// @name Iteration
-/// @{
-public:
- /// @brief Get an iterator that from the beginning of the symbol table.
- inline iterator begin() { return mmap.begin(); }
-
- /// @brief Get a const_iterator that from the beginning of the symbol table.
- inline const_iterator begin() const { return mmap.begin(); }
-
- /// @brief Get an iterator to the end of the symbol table.
- inline iterator end() { return mmap.end(); }
-
- /// @brief Get a const_iterator to the end of the symbol table.
- inline const_iterator end() const { return mmap.end(); }
-
-/// @}
-/// @name Mutators
-/// @{
-public:
- /// insert - The method inserts a new entry into the stringmap. This will
- /// replace existing entry, if any.
- void insert(StringRef Name, NamedMDNode *Node) {
- StringMapEntry<NamedMDNode *> &Entry =
- mmap.GetOrCreateValue(Name, Node);
- if (Entry.getValue() != Node) {
- mmap.remove(&Entry);
- (void) mmap.GetOrCreateValue(Name, Node);
- }
- }
-
- /// This method removes a NamedMDNode from the symbol table.
- void remove(StringRef Name) { mmap.erase(Name); }
-
-/// @}
-/// @name Internal Data
-/// @{
-private:
- MDMap mmap; ///< The map that holds the symbol table.
-/// @}
-};
-
} // End llvm namespace
#endif
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 503fbbdab8d6..1f2528fa560f 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -65,10 +65,127 @@ void AliasAnalysis::copyValue(Value *From, Value *To) {
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
- // FIXME: we can do better.
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+
+ ModRefResult Mask = ModRef;
+ if (MRB == OnlyReadsMemory)
+ Mask = Ref;
+ else if (MRB == AliasAnalysis::AccessesArguments) {
+ bool doesAlias = false;
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (!isNoAlias(*AI, ~0U, P, Size)) {
+ doesAlias = true;
+ break;
+ }
+
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ // If P points to a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && pointsToConstantMemory(P))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
+ return NoModRef;
+
+ AliasAnalysis::ModRefResult Mask = ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (CS1B == OnlyReadsMemory)
+ Mask = ModRefResult(Mask & Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (CS2B == AccessesArguments) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+ if (R == Mask)
+ break;
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (CS1B == AccessesArguments) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
+ if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+ R = Mask;
+ break;
+ }
+ if (R == NoModRef)
+ return R;
+ }
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // Call back into the alias analysis with the other form of getModRefBehavior
+ // to see if it can give a better response.
+ if (const Function *F = CS.getCalledFunction())
+ Min = getModRefBehavior(F);
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Min;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any result we've managed to compute.
+ return std::min(AA->getModRefBehavior(CS), Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getModRefInfo(CS1, CS2);
+ return AA->getModRefBehavior(F);
}
@@ -77,87 +194,63 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
//===----------------------------------------------------------------------===//
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) {
- return alias(L->getOperand(0), getTypeStoreSize(L->getType()),
- P, Size) ? Ref : NoModRef;
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
+ // Be conservative in the face of volatile.
+ if (L->isVolatile())
+ return ModRef;
+
+ // If the load address doesn't alias the given address, it doesn't read
+ // or write the specified memory.
+ if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size))
+ return NoModRef;
+
+ // Otherwise, a load just reads.
+ return Ref;
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) {
- // If the stored address cannot alias the pointer in question, then the
- // pointer cannot be modified by the store.
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) {
+ // Be conservative in the face of volatile.
+ if (S->isVolatile())
+ return ModRef;
+
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
if (!alias(S->getOperand(1),
getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have been
// modified by this store.
- return pointsToConstantMemory(P) ? NoModRef : Mod;
-}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(CallSite CS,
- std::vector<PointerAccessInfo> *Info) {
- if (CS.doesNotAccessMemory())
- // Can't do better than this.
- return DoesNotAccessMemory;
- ModRefBehavior MRB = getModRefBehavior(CS.getCalledFunction(), Info);
- if (MRB != DoesNotAccessMemory && CS.onlyReadsMemory())
- return OnlyReadsMemory;
- return MRB;
-}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(Function *F,
- std::vector<PointerAccessInfo> *Info) {
- if (F) {
- if (F->doesNotAccessMemory())
- // Can't do better than this.
- return DoesNotAccessMemory;
- if (F->onlyReadsMemory())
- return OnlyReadsMemory;
- if (unsigned id = F->getIntrinsicID())
- return getModRefBehavior(id);
- }
- return UnknownModRefBehavior;
-}
+ if (pointsToConstantMemory(P))
+ return NoModRef;
-AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ // Otherwise, a store just writes.
+ return Mod;
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
- ModRefBehavior MRB = getModRefBehavior(CS);
- if (MRB == DoesNotAccessMemory)
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(V->getOperand(0), UnknownSize, P, Size))
return NoModRef;
-
- ModRefResult Mask = ModRef;
- if (MRB == OnlyReadsMemory)
- Mask = Ref;
- else if (MRB == AliasAnalysis::AccessesArguments) {
- bool doesAlias = false;
- for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI)
- if (!isNoAlias(*AI, ~0U, P, Size)) {
- doesAlias = true;
- break;
- }
- if (!doesAlias)
- return NoModRef;
- }
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this va_arg.
+ if (pointsToConstantMemory(P))
+ return NoModRef;
- if (!AA) return Mask;
+ // Otherwise, a va_arg reads and writes.
+ return ModRef;
+}
- // If P points to a constant memory location, the call definitely could not
- // modify the memory location.
- if ((Mask & Mod) && AA->pointsToConstantMemory(P))
- Mask = ModRefResult(Mask & ~Mod);
- return ModRefResult(Mask & AA->getModRefInfo(CS, P, Size));
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
}
// AliasAnalysis destructor: DO NOT move this to the header file for
@@ -206,12 +299,12 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
const Value *Ptr, unsigned Size) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
- BasicBlock::iterator I = const_cast<Instruction*>(&I1);
- BasicBlock::iterator E = const_cast<Instruction*>(&I2);
+ BasicBlock::const_iterator I = &I1;
+ BasicBlock::const_iterator E = &I2;
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, const_cast<Value*>(Ptr), Size) & Mod)
+ if (getModRefInfo(I, Ptr, Size) & Mod)
return true;
return false;
}
@@ -220,7 +313,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
/// function.
bool llvm::isNoAliasCall(const Value *V) {
if (isa<CallInst>(V) || isa<InvokeInst>(V))
- return CallSite(const_cast<Instruction*>(cast<Instruction>(V)))
+ return ImmutableCallSite(cast<Instruction>(V))
.paramHasAttr(0, Attribute::NoAlias);
return false;
}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 1053955ea233..b17804186a63 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -34,7 +34,7 @@ namespace {
Module *M;
public:
static char ID; // Class identification, replacement for typeinfo
- AliasAnalysisCounter() : ModulePass(&ID) {
+ AliasAnalysisCounter() : ModulePass(ID) {
No = May = Must = 0;
NoMR = JustRef = JustMod = MR = 0;
}
@@ -87,8 +87,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -103,17 +103,18 @@ namespace {
AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size);
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1,CS2);
}
};
}
char AliasAnalysisCounter::ID = 0;
-static RegisterPass<AliasAnalysisCounter>
-X("count-aa", "Count Alias Analysis Query Responses", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+ "Count Alias Analysis Query Responses", false, true, false);
ModulePass *llvm::createAliasAnalysisCounterPass() {
return new AliasAnalysisCounter();
@@ -146,7 +147,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
}
AliasAnalysis::ModRefResult
-AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size);
const char *MRString;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 37ee9fc22c9b..ce363cbc7bbd 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -50,7 +50,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- AAEval() : FunctionPass(&ID) {}
+ AAEval() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -74,8 +74,8 @@ namespace {
}
char AAEval::ID = 0;
-static RegisterPass<AAEval>
-X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true);
+INITIALIZE_PASS(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true);
FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
@@ -107,6 +107,15 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
}
}
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *CSA.getInstruction()
+ << " <-> " << *CSB.getInstruction() << '\n';
+ }
+}
+
static inline bool isInterestingPointer(Value *V) {
return V->getType()->isPointerTy()
&& !isa<ConstantPointerNull>(V);
@@ -126,8 +135,7 @@ bool AAEval::runOnFunction(Function &F) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
Pointers.insert(&*I);
Instruction &Inst = *I;
- CallSite CS = CallSite::get(&Inst);
- if (CS) {
+ if (CallSite CS = cast<Value>(&Inst)) {
Value *Callee = CS.getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
@@ -137,6 +145,7 @@ bool AAEval::runOnFunction(Function &F) {
AI != AE; ++AI)
if (isInterestingPointer(*AI))
Pointers.insert(*AI);
+ CallSites.insert(CS);
} else {
// Consider all operands.
for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
@@ -144,8 +153,6 @@ bool AAEval::runOnFunction(Function &F) {
if (isInterestingPointer(*OI))
Pointers.insert(*OI);
}
-
- if (CS.getInstruction()) CallSites.insert(CS);
}
if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
@@ -197,13 +204,13 @@ bool AAEval::runOnFunction(Function &F) {
PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
++NoModRef; break;
case AliasAnalysis::Mod:
- PrintModRefResults(" Mod", PrintMod, I, *V, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
++Mod; break;
case AliasAnalysis::Ref:
- PrintModRefResults(" Ref", PrintRef, I, *V, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
++Ref; break;
case AliasAnalysis::ModRef:
- PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent());
+ PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
++ModRef; break;
default:
errs() << "Unknown alias query result!\n";
@@ -211,6 +218,29 @@ bool AAEval::runOnFunction(Function &F) {
}
}
+ // Mod/ref alias analysis: compare all pairs of calls
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ if (D == C)
+ continue;
+ switch (AA.getModRefInfo(*C, *D)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
return false;
}
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index bc2d9c55d183..b9fe64608c01 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- AliasDebugger() : ModulePass(&ID) {}
+ AliasDebugger() : ModulePass(ID) {}
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -83,8 +83,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -99,12 +99,14 @@ namespace {
return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
}
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
return AliasAnalysis::getModRefInfo(CS, P, Size);
}
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1,CS2);
}
@@ -126,9 +128,8 @@ namespace {
}
char AliasDebugger::ID = 0;
-static RegisterPass<AliasDebugger>
-X("debug-aa", "AA use debugger", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+ "AA use debugger", false, true, false);
Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 02aff50d8a13..e74543bb508a 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -22,7 +22,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,6 +34,7 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// Update the alias and access types of this set...
AccessTy |= AS.AccessTy;
AliasTy |= AS.AliasTy;
+ Volatile |= AS.Volatile;
if (AliasTy == MustAlias) {
// Check that these two merged sets really are must aliases. Since both
@@ -111,11 +111,11 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
*PtrListEnd = &Entry;
PtrListEnd = Entry.setPrevInList(PtrListEnd);
assert(*PtrListEnd == 0 && "End of list is not null?");
- addRef(); // Entry points to alias set...
+ addRef(); // Entry points to alias set.
}
void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
- CallSites.push_back(CS);
+ CallSites.push_back(CS.getInstruction());
AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
@@ -140,7 +140,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
assert(CallSites.empty() && "Illegal must alias set!");
// If this is a set of MustAliases, only check to see if the pointer aliases
- // SOME value in the set...
+ // SOME value in the set.
PointerRec *SomePtr = getSomePointer();
assert(SomePtr && "Empty must-alias set??");
return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size);
@@ -155,8 +155,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
// Check the call sites list and invoke list...
if (!CallSites.empty()) {
for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i], const_cast<Value*>(Ptr), Size)
- != AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef)
return true;
}
@@ -167,10 +166,11 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
if (AA.doesNotAccessMemory(CS))
return false;
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef ||
- AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef)
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
return true;
+ }
for (iterator I = begin(), E = end(); I != E; ++I)
if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
@@ -200,14 +200,15 @@ void AliasSetTracker::clear() {
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
unsigned Size) {
AliasSet *FoundSet = 0;
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) {
- if (FoundSet == 0) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- } else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
- }
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue;
+
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
}
+ }
return FoundSet;
}
@@ -226,15 +227,15 @@ bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const {
AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
AliasSet *FoundSet = 0;
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesCallSite(CS, AA)) {
- if (FoundSet == 0) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- } else if (!I->Forward) { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
- }
- }
-
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesCallSite(CS, AA))
+ continue;
+
+ if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ else if (!I->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
return FoundSet;
}
@@ -247,22 +248,24 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size,
bool *New) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
- // Check to see if the pointer is already known...
+ // Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
Entry.updateSize(Size);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
- } else if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
- // Add it to the alias set it aliases...
+ }
+
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
+ // Add it to the alias set it aliases.
AS->addPointer(*this, Entry, Size);
return *AS;
- } else {
- if (New) *New = true;
- // Otherwise create a new alias set to hold the loaded pointer...
- AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size);
- return AliasSets.back();
}
+
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer.
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size);
+ return AliasSets.back();
}
bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
@@ -305,28 +308,27 @@ bool AliasSetTracker::add(CallSite CS) {
return true; // doesn't alias anything
AliasSet *AS = findAliasSetForCallSite(CS);
- if (!AS) {
- AliasSets.push_back(new AliasSet());
- AS = &AliasSets.back();
- AS->addCallSite(CS, AA);
- return true;
- } else {
+ if (AS) {
AS->addCallSite(CS, AA);
return false;
}
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addCallSite(CS, AA);
+ return true;
}
bool AliasSetTracker::add(Instruction *I) {
- // Dispatch to one of the other add methods...
+ // Dispatch to one of the other add methods.
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return add(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
return add(SI);
- else if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (CallInst *CI = dyn_cast<CallInst>(I))
return add(CI);
- else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
return add(II);
- else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return add(VAAI);
return true;
}
@@ -343,23 +345,23 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
// Loop over all of the alias sets in AST, adding the pointers contained
// therein into the current alias sets. This can cause alias sets to be
// merged together in the current AST.
- for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I)
- if (!I->Forward) { // Ignore forwarding alias sets
- AliasSet &AS = const_cast<AliasSet&>(*I);
-
- // If there are any call sites in the alias set, add them to this AST.
- for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
- add(AS.CallSites[i]);
-
- // Loop over all of the pointers in this alias set...
- AliasSet::iterator I = AS.begin(), E = AS.end();
- bool X;
- for (; I != E; ++I) {
- AliasSet &NewAS = addPointer(I.getPointer(), I.getSize(),
- (AliasSet::AccessType)AS.AccessTy, X);
- if (AS.isVolatile()) NewAS.setVolatile();
- }
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+ if (I->Forward) continue; // Ignore forwarding alias sets
+
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+ add(AS.CallSites[i]);
+
+ // Loop over all of the pointers in this alias set.
+ bool X;
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
}
+ }
}
/// remove - Remove the specified (potentially non-empty) alias set from the
@@ -435,11 +437,11 @@ bool AliasSetTracker::remove(Instruction *I) {
// Dispatch to one of the other remove methods...
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return remove(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
return remove(SI);
- else if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (CallInst *CI = dyn_cast<CallInst>(I))
return remove(CI);
- else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return remove(VAAI);
return true;
}
@@ -455,12 +457,17 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
AA.deleteValue(PtrVal);
// If this is a call instruction, remove the callsite from the appropriate
- // AliasSet.
- CallSite CS = CallSite::get(PtrVal);
- if (CS.getInstruction())
- if (!AA.doesNotAccessMemory(CS))
- if (AliasSet *AS = findAliasSetForCallSite(CS))
- AS->removeCallSite(CS);
+ // AliasSet (if present).
+ if (CallSite CS = PtrVal) {
+ if (!AA.doesNotAccessMemory(CS)) {
+ // Scan all the alias sets to see if this call site is contained.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward) continue;
+
+ I->removeCallSite(CS);
+ }
+ }
+ }
// First, look up the PointerRec for this pointer.
PointerMapType::iterator I = PointerMap.find(PtrVal);
@@ -510,7 +517,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
//===----------------------------------------------------------------------===//
void AliasSet::print(raw_ostream &OS) const {
- OS << " AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] ";
+ OS << " AliasSet[" << (void*)this << ", " << RefCount << "] ";
OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
switch (AccessTy) {
case NoModRef: OS << "No access "; break;
@@ -536,7 +543,7 @@ void AliasSet::print(raw_ostream &OS) const {
OS << "\n " << CallSites.size() << " Call Sites: ";
for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
if (i) OS << ", ";
- WriteAsOperand(OS, CallSites[i].getCalledValue());
+ WriteAsOperand(OS, CallSites[i]);
}
}
OS << "\n";
@@ -580,7 +587,7 @@ namespace {
AliasSetTracker *Tracker;
public:
static char ID; // Pass identification, replacement for typeid
- AliasSetPrinter() : FunctionPass(&ID) {}
+ AliasSetPrinter() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -600,5 +607,5 @@ namespace {
}
char AliasSetPrinter::ID = 0;
-static RegisterPass<AliasSetPrinter>
-X("print-alias-sets", "Alias Set Printer", false, true);
+INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4f53a6d62559..113c72b94dac 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -18,6 +18,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
@@ -30,6 +31,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include <algorithm>
using namespace llvm;
@@ -137,8 +139,8 @@ namespace {
///
struct NoAA : public ImmutablePass, public AliasAnalysis {
static char ID; // Class identification, replacement for typeinfo
- NoAA() : ImmutablePass(&ID) {}
- explicit NoAA(void *PID) : ImmutablePass(PID) { }
+ NoAA() : ImmutablePass(ID) {}
+ explicit NoAA(char &PID) : ImmutablePass(PID) { }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
@@ -152,16 +154,20 @@ namespace {
return MayAlias;
}
- virtual void getArgumentAccesses(Function *F, CallSite CS,
- std::vector<PointerAccessInfo> &Info) {
- llvm_unreachable("This method may not be called on this function!");
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
}
virtual bool pointsToConstantMemory(const Value *P) { return false; }
- virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
return ModRef;
}
- virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return ModRef;
}
@@ -169,11 +175,11 @@ namespace {
virtual void copyValue(Value *From, Value *To) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -182,15 +188,279 @@ namespace {
// Register this pass...
char NoAA::ID = 0;
-static RegisterPass<NoAA>
-U("no-aa", "No Alias Analysis (always returns 'may' alias)", true, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> V(U);
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, false);
ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+ enum ExtensionKind {
+ EK_NotExtended,
+ EK_SignExt,
+ EK_ZeroExt
+ };
+
+ struct VariableGEPIndex {
+ const Value *V;
+ ExtensionKind Extension;
+ int64_t Scale;
+ };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers. Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends. The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+ ExtensionKind &Extension,
+ const TargetData &TD, unsigned Depth) {
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+ // Limit our recursion depth.
+ if (Depth == 6) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ switch (BOp->getOpcode()) {
+ default: break;
+ case Instruction::Or:
+ // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
+ // analyze it.
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+ break;
+ // FALL THROUGH.
+ case Instruction::Add:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset += RHSC->getValue();
+ return V;
+ case Instruction::Mul:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset *= RHSC->getValue();
+ Scale *= RHSC->getValue();
+ return V;
+ case Instruction::Shl:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset <<= RHSC->getValue().getLimitedValue();
+ Scale <<= RHSC->getValue().getLimitedValue();
+ return V;
+ }
+ }
+ }
+
+ // Since GEP indices are sign extended anyway, we don't care about the high
+ // bits of a sign or zero extended value - just scales and offsets. The
+ // extensions have to be consistent though.
+ if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+ (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ Value *CastOp = cast<CastInst>(V)->getOperand(0);
+ unsigned OldWidth = Scale.getBitWidth();
+ unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+ Scale.trunc(SmallWidth);
+ Offset.trunc(SmallWidth);
+ Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+ TD, Depth+1);
+ Scale.zext(OldWidth);
+ Offset.zext(OldWidth);
+
+ return Result;
+ }
+
+ Scale = 1;
+ Offset = 0;
+ return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that Value::getUnderlyingObject() can look through. When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ const TargetData *TD) {
+ // Limit recursion depth to limit compile time in crazy cases.
+ unsigned MaxLookup = 6;
+
+ BaseOffs = 0;
+ do {
+ // See if this is a bitcast or GEP.
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (Op == 0) {
+ // The only non-operator case we can handle are GlobalAliases.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden()) {
+ V = GA->getAliasee();
+ continue;
+ }
+ }
+ return V;
+ }
+
+ if (Op->getOpcode() == Instruction::BitCast) {
+ V = Op->getOperand(0);
+ continue;
+ }
+
+ const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+ if (GEPOp == 0)
+ return V;
+
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return V;
+
+ // If we are lacking TargetData information, we can't compute the offets of
+ // elements computed by GEPs. However, we can handle bitcast equivalent
+ // GEPs.
+ if (TD == 0) {
+ if (!GEPOp->hasAllZeroIndices())
+ return V;
+ V = GEPOp->getOperand(0);
+ continue;
+ }
+
+ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+ gep_type_iterator GTI = gep_type_begin(GEPOp);
+ for (User::const_op_iterator I = GEPOp->op_begin()+1,
+ E = GEPOp->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ if (FieldNo == 0) continue;
+
+ BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+ continue;
+ }
+
+ // For an array/pointer, add the element offset, explicitly scaled.
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero()) continue;
+ BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ continue;
+ }
+
+ uint64_t Scale = TD->getTypeAllocSize(*GTI);
+ ExtensionKind Extension = EK_NotExtended;
+
+ // If the integer type is smaller than the pointer size, it is implicitly
+ // sign extended to pointer size.
+ unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+ if (TD->getPointerSizeInBits() > Width)
+ Extension = EK_SignExt;
+
+ // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+ APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+ *TD, 0);
+
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+ BaseOffs += IndexOffset.getZExtValue()*Scale;
+ Scale *= IndexScale.getZExtValue();
+
+
+ // If we already had an occurrance of this index variable, merge this
+ // scale into it. For example, we want to handle:
+ // A[x][x] -> x*16 + x*4 -> x*20
+ // This also ensures that 'x' only appears in the index list once.
+ for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+ if (VarIndices[i].V == Index &&
+ VarIndices[i].Extension == Extension) {
+ Scale += VarIndices[i].Scale;
+ VarIndices.erase(VarIndices.begin()+i);
+ break;
+ }
+ }
+
+ // Make sure that we have a scale that makes sense for this target's
+ // pointer size.
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ Scale <<= ShiftBits;
+ Scale >>= ShiftBits;
+ }
+
+ if (Scale) {
+ VariableGEPIndex Entry = {Index, Extension, Scale};
+ VarIndices.push_back(Entry);
+ }
+ }
+
+ // Analyze the base pointer next.
+ V = GEPOp->getOperand(0);
+ } while (--MaxLookup);
+
+ // If the chain of expressions is too deep, just return early.
+ return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty()) return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin()+j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
@@ -220,10 +490,10 @@ namespace {
/// derives from the NoAA class.
struct BasicAliasAnalysis : public NoAA {
static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : NoAA(&ID) {}
+ BasicAliasAnalysis() : NoAA(ID) {}
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+ virtual AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
assert(Visited.empty() && "Visited must be cleared after use!");
assert(notDifferentParent(V1, V2) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
@@ -232,19 +502,33 @@ namespace {
return Alias;
}
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
- bool pointsToConstantMemory(const Value *P);
+ virtual bool pointsToConstantMemory(const Value *P);
+
+ /// getModRefBehavior - Return the behavior when calling the given
+ /// call site.
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// getModRefBehavior - Return the behavior when calling the given function.
+ /// For use when the call site is not known.
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
/// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -275,11 +559,9 @@ namespace {
// Register this pass...
char BasicAliasAnalysis::ID = 0;
-static RegisterPass<BasicAliasAnalysis>
-X("basicaa", "Basic Alias Analysis (default AA impl)", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis, true> Y(X);
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (default AA impl)",
+ false, true, true);
ImmutablePass *llvm::createBasicAliasAnalysisPass() {
return new BasicAliasAnalysis();
@@ -295,16 +577,50 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
// global to be marked constant in some modules and non-constant in others.
// GV may even be a declaration, not a definition.
return GV->isConstant();
- return false;
+
+ return NoAA::pointsToConstantMemory(P);
}
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the callsite knows it only reads memory, don't return worse
+ // than that.
+ if (CS.onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return std::min(AliasAnalysis::getModRefBehavior(CS), Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (F->doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+ if (F->onlyReadsMemory())
+ return OnlyReadsMemory;
+ if (unsigned id = F->getIntrinsicID())
+ return getIntrinsicModRefBehavior(id);
+
+ return NoAA::getModRefBehavior(F);
+}
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
assert(notDifferentParent(CS.getInstruction(), P) &&
"AliasAnalysis query involving multiple functions!");
@@ -316,7 +632,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
// the current function not to the current function, and a tail callee
// may reference them.
if (isa<AllocaInst>(Object))
- if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
@@ -327,7 +643,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
isNonEscapingLocalObject(Object)) {
bool PassedAsArg = false;
unsigned ArgNo = 0;
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture pointer arguments.
if (!(*CI)->getType()->isPointerTy() ||
@@ -338,7 +654,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) {
+ if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) {
PassedAsArg = true;
break;
}
@@ -349,127 +665,76 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
}
// Finally, handle specific knowledge of intrinsics.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II == 0)
- return AliasAnalysis::getModRefInfo(CS, P, Size);
-
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::memcpy:
- case Intrinsic::memmove: {
- unsigned Len = ~0U;
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
- Len = LenCI->getZExtValue();
- Value *Dest = II->getArgOperand(0);
- Value *Src = II->getArgOperand(1);
- if (isNoAlias(Dest, Len, P, Size)) {
- if (isNoAlias(Src, Len, P, Size))
- return NoModRef;
- return Ref;
- }
- break;
- }
- case Intrinsic::memset:
- // Since memset is 'accesses arguments' only, the AliasAnalysis base class
- // will handle it for the variable length case.
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- unsigned Len = LenCI->getZExtValue();
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != 0)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ unsigned Len = UnknownSize;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
- if (isNoAlias(Dest, Len, P, Size))
+ Value *Src = II->getArgOperand(1);
+ if (isNoAlias(Dest, Len, P, Size)) {
+ if (isNoAlias(Src, Len, P, Size))
+ return NoModRef;
+ return Ref;
+ }
+ break;
+ }
+ case Intrinsic::memset:
+ // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+ // will handle it for the variable length case.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ unsigned Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ if (isNoAlias(Dest, Len, P, Size))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::atomic_cmp_swap:
+ case Intrinsic::atomic_swap:
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin:
+ if (TD) {
+ Value *Op1 = II->getArgOperand(0);
+ unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
+ if (isNoAlias(Op1, Op1Size, P, Size))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ unsigned PtrSize =
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
return NoModRef;
+ break;
}
- break;
- case Intrinsic::atomic_cmp_swap:
- case Intrinsic::atomic_swap:
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin:
- if (TD) {
- Value *Op1 = II->getArgOperand(0);
- unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
- if (isNoAlias(Op1, Op1Size, P, Size))
+ case Intrinsic::invariant_end: {
+ unsigned PtrSize =
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
return NoModRef;
+ break;
+ }
}
- break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start: {
- unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
- return NoModRef;
- break;
- }
- case Intrinsic::invariant_end: {
- unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
- if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
- return NoModRef;
- break;
- }
- }
// The AliasAnalysis base class has some smarts, lets use them.
return AliasAnalysis::getModRefInfo(CS, P, Size);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
- // If CS1 or CS2 are readnone, they don't interact.
- ModRefBehavior CS1B = AliasAnalysis::getModRefBehavior(CS1);
- if (CS1B == DoesNotAccessMemory) return NoModRef;
-
- ModRefBehavior CS2B = AliasAnalysis::getModRefBehavior(CS2);
- if (CS2B == DoesNotAccessMemory) return NoModRef;
-
- // If they both only read from memory, just return ref.
- if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
- return Ref;
-
- // Otherwise, fall back to NoAA (mod+ref).
- return NoAA::getModRefInfo(CS1, CS2);
-}
-
-/// GetIndiceDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndiceDifference(
- SmallVectorImpl<std::pair<const Value*, int64_t> > &Dest,
- const SmallVectorImpl<std::pair<const Value*, int64_t> > &Src) {
- if (Src.empty()) return;
-
- for (unsigned i = 0, e = Src.size(); i != e; ++i) {
- const Value *V = Src[i].first;
- int64_t Scale = Src[i].second;
-
- // Find V in Dest. This is N^2, but pointer indices almost never have more
- // than a few variable indexes.
- for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
- if (Dest[j].first != V) continue;
-
- // If we found it, subtract off Scale V's from the entry in Dest. If it
- // goes to zero, remove the entry.
- if (Dest[j].second != Scale)
- Dest[j].second -= Scale;
- else
- Dest.erase(Dest.begin()+j);
- Scale = 0;
- break;
- }
-
- // If we didn't consume this entry, add it to the end of the Dest list.
- if (Scale)
- Dest.push_back(std::make_pair(V, -Scale));
- }
-}
-
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(),
@@ -488,13 +753,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
return MayAlias;
int64_t GEP1BaseOffset;
- SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices;
+ SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
// If we have two gep instructions with must-alias'ing base pointers, figure
// out if the indexes to the GEP tell us anything about the derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, ~0U, UnderlyingV2, ~0U);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize,
+ UnderlyingV2, UnknownSize);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
@@ -507,7 +773,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
int64_t GEP2BaseOffset;
- SmallVector<std::pair<const Value*, int64_t>, 4> GEP2VariableIndices;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
@@ -523,7 +789,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
- GetIndiceDifference(GEP1VariableIndices, GEP2VariableIndices);
+ GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
} else {
// Check to see if these two pointers are related by the getelementptr
@@ -531,10 +797,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
- if (V1Size == ~0U && V2Size == ~0U)
+ if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, ~0U, V2, V2Size);
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -578,8 +844,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// provides an offset of 4 bytes (assuming a <= 4 byte access).
for (unsigned i = 0, e = GEP1VariableIndices.size();
i != e && GEP1BaseOffset;++i)
- if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].second)
- GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].second;
+ if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+ GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
// If our known offset is bigger than the access size, we know we don't have
// an alias.
@@ -782,8 +1048,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
if (TD)
- if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) ||
- (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
+ if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
return NoAlias;
// FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
@@ -810,7 +1076,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1))
return aliasSelect(S1, V1Size, V2, V2Size);
- return MayAlias;
+ return NoAA::alias(V1, V1Size, V2, V2Size);
}
// Make sure that anything that uses AliasAnalysis pulls in this file.
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index e06704bd897c..617a362062fc 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
namespace {
struct CFGViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGViewer() : FunctionPass(&ID) {}
+ CFGViewer() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
F.viewCFG();
@@ -41,13 +41,12 @@ namespace {
}
char CFGViewer::ID = 0;
-static RegisterPass<CFGViewer>
-V0("view-cfg", "View CFG of function", false, true);
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true);
namespace {
struct CFGOnlyViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGOnlyViewer() : FunctionPass(&ID) {}
+ CFGOnlyViewer() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
F.viewCFGOnly();
@@ -63,15 +62,14 @@ namespace {
}
char CFGOnlyViewer::ID = 0;
-static RegisterPass<CFGOnlyViewer>
-V1("view-cfg-only",
- "View CFG of function (with no function bodies)", false, true);
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true);
namespace {
struct CFGPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGPrinter() : FunctionPass(&ID) {}
- explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
+ CFGPrinter() : FunctionPass(ID) {}
+ explicit CFGPrinter(char &pid) : FunctionPass(pid) {}
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
@@ -97,14 +95,14 @@ namespace {
}
char CFGPrinter::ID = 0;
-static RegisterPass<CFGPrinter>
-P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+ false, true);
namespace {
struct CFGOnlyPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGOnlyPrinter() : FunctionPass(&ID) {}
- explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
+ CFGOnlyPrinter() : FunctionPass(ID) {}
+ explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {}
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
errs() << "Writing '" << Filename << "'...";
@@ -128,9 +126,9 @@ namespace {
}
char CFGOnlyPrinter::ID = 0;
-static RegisterPass<CFGOnlyPrinter>
-P2("dot-cfg-only",
- "Print CFG of function to 'dot' file (with no function bodies)", false, true);
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)",
+ false, true);
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d9b670dea58d..6a2ab681d1ac 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -38,12 +38,15 @@ add_llvm_library(LLVMAnalysis
ProfileInfoLoader.cpp
ProfileInfoLoaderPass.cpp
ProfileVerifierPass.cpp
+ RegionInfo.cpp
+ RegionPrinter.cpp
ScalarEvolution.cpp
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
SparsePropagation.cpp
Trace.cpp
+ TypeBasedAliasAnalysis.cpp
ValueTracking.cpp
)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 047825884ef3..90eae20858fb 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -69,7 +69,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
switch (I->getOpcode()) {
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS = CallSite::get(I);
+ CallSite CS(I);
// Not captured if the callee is readonly, doesn't return a copy through
// its return value and doesn't unwind (a readonly function can leak bits
// by throwing an exception or not depending on the input value).
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 13d8f4de4824..0bf7967e83b1 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -778,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ICmp:
case Instruction::FCmp: assert(0 && "Invalid for compares");
case Instruction::Call:
- if (Function *F = dyn_cast<Function>(Ops[CallInst::ArgOffset ? 0:NumOps-1]))
+ if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1);
+ return ConstantFoldCall(F, Ops, NumOps - 1);
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 3532b052dc55..056775060610 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
void printVariableDeclaration(const Value *V);
public:
static char ID; // Pass identification
- PrintDbgInfo() : FunctionPass(&ID), Out(outs()) {}
+ PrintDbgInfo() : FunctionPass(ID), Out(errs()) {}
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -48,8 +48,8 @@ namespace {
}
};
char PrintDbgInfo::ID = 0;
- static RegisterPass<PrintDbgInfo> X("print-dbginfo",
- "Print debug info in human readable form");
+ INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+ "Print debug info in human readable form", false, false);
}
FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index c8d0d22ec2e1..5ca89c658df6 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetMachine.h" // FIXME: LAYERING VIOLATION!
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Intrinsics.h"
@@ -22,6 +21,8 @@
#include "llvm/Module.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,7 +33,22 @@ using namespace llvm::dwarf;
// DIDescriptor
//===----------------------------------------------------------------------===//
-StringRef
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
DIDescriptor::getStringField(unsigned Elt) const {
if (DbgNode == 0)
return StringRef();
@@ -60,7 +76,8 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
return DIDescriptor();
if (Elt < DbgNode->getNumOperands())
- return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+ return
+ DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
return DIDescriptor();
}
@@ -73,6 +90,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
return 0;
}
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
Function *DIDescriptor::getFunctionField(unsigned Elt) const {
if (DbgNode == 0)
return 0;
@@ -109,6 +135,7 @@ bool DIDescriptor::isDerivedType() const {
case dwarf::DW_TAG_restrict_type:
case dwarf::DW_TAG_member:
case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_friend:
return true;
default:
// CompositeTypes are currently modelled as DerivedTypes.
@@ -161,7 +188,8 @@ bool DIDescriptor::isSubprogram() const {
/// isGlobalVariable - Return true if the specified tag is legal for
/// DIGlobalVariable.
bool DIDescriptor::isGlobalVariable() const {
- return DbgNode && getTag() == dwarf::DW_TAG_variable;
+ return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+ getTag() == dwarf::DW_TAG_constant);
}
/// isGlobal - Return true if the specified tag is legal for DIGlobal.
@@ -233,9 +261,8 @@ unsigned DIArray::getNumElements() const {
}
/// replaceAllUsesWith - Replace all uses of debug info referenced by
-/// this descriptor. After this completes, the current debug info value
-/// is erased.
-void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
if (!DbgNode)
return;
@@ -249,7 +276,7 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
const MDNode *DN = D;
const Value *V = cast_or_null<Value>(DN);
Node->replaceAllUsesWith(const_cast<Value*>(V));
- Node->destroy();
+ MDNode::deleteTemporary(Node);
}
}
@@ -277,6 +304,16 @@ bool DIType::Verify() const {
return true;
}
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+ return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+ return isDerivedType();
+}
+
/// Verify - Verify that a composite type descriptor is well formed.
bool DICompositeType::Verify() const {
if (!DbgNode)
@@ -327,7 +364,7 @@ bool DIGlobalVariable::Verify() const {
if (!Ty.Verify())
return false;
- if (!getGlobal())
+ if (!getGlobal() && !getConstant())
return false;
return true;
@@ -355,7 +392,7 @@ bool DIVariable::Verify() const {
bool DILocation::Verify() const {
if (!DbgNode)
return false;
-
+
return DbgNode->getNumOperands() == 4;
}
@@ -378,7 +415,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
DIType BaseType = getTypeDerivedFrom();
- // If this type is not derived from any type then take conservative
+ // If this type is not derived from any type then take conservative
// approach.
if (!BaseType.isValid())
return getSizeInBits();
@@ -387,17 +424,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
else
return BaseType.getSizeInBits();
}
-
+
return getSizeInBits();
}
-/// isInlinedFnArgument - Return trule if this variable provides debugging
+/// isInlinedFnArgument - Return true if this variable provides debugging
/// information for an inlined function arguments.
bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
assert(CurFn && "Invalid function");
if (!getContext().isSubprogram())
return false;
- // This variable is not inlined function argument if its scope
+ // This variable is not inlined function argument if its scope
// does not describe current function.
return !(DISubprogram(getContext()).describes(CurFn));
}
@@ -416,7 +453,7 @@ bool DISubprogram::describes(const Function *F) {
return false;
}
-unsigned DISubprogram::isOptimized() const {
+unsigned DISubprogram::isOptimized() const {
assert (DbgNode && "Invalid subprogram descriptor!");
if (DbgNode->getNumOperands() == 16)
return getUnsignedField(15);
@@ -426,7 +463,7 @@ unsigned DISubprogram::isOptimized() const {
StringRef DIScope::getFilename() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlock())
+ if (isLexicalBlock())
return DILexicalBlock(DbgNode).getFilename();
if (isSubprogram())
return DISubprogram(DbgNode).getFilename();
@@ -445,7 +482,7 @@ StringRef DIScope::getFilename() const {
StringRef DIScope::getDirectory() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlock())
+ if (isLexicalBlock())
return DILexicalBlock(DbgNode).getDirectory();
if (isSubprogram())
return DISubprogram(DbgNode).getDirectory();
@@ -899,7 +936,26 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
ContainingType
};
- return DICompositeType(MDNode::get(VMContext, &Elts[0], 13));
+
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
+ // Create a named metadata so that we do not lose this enum info.
+ if (Tag == dwarf::DW_TAG_enumeration_type) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ }
+ return DICompositeType(Node);
+}
+
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIFactory::CreateTemporaryType() {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(DW_TAG_base_type)
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+ return DIType(Node);
}
@@ -915,8 +971,8 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
unsigned Flags,
DIType DerivedFrom,
DIArray Elements,
- unsigned RuntimeLang) {
-
+ unsigned RuntimeLang,
+ MDNode *ContainingType) {
Value *Elts[] = {
GetTagConstant(Tag),
Context,
@@ -929,9 +985,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
DerivedFrom,
Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
+ ContainingType
};
- return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
+ // Create a named metadata so that we do not lose this enum info.
+ if (Tag == dwarf::DW_TAG_enumeration_type) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ }
+ return DICompositeType(Node);
}
@@ -980,8 +1043,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
}
/// CreateSubprogramDefinition - Create new subprogram descriptor for the
-/// given declaration.
-DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
+/// given declaration.
+DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){
if (SPDeclaration.isDefinition())
return DISubprogram(SPDeclaration);
@@ -1046,6 +1109,38 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
return DIGlobalVariable(Node);
}
+/// CreateGlobalVariable - Create a new descriptor for the specified constant.
+DIGlobalVariable
+DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+ StringRef DisplayName,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,bool isLocalToUnit,
+ bool isDefinition, llvm::Constant *Val) {
+ Value *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, DisplayName),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ Val
+ };
+
+ Value *const *Vs = &Elts[0];
+ MDNode *Node = MDNode::get(VMContext,Vs, 12);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+
+ return DIGlobalVariable(Node);
+}
/// CreateVariable - Create a new descriptor for the specified variable.
DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
@@ -1073,10 +1168,10 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
char One = '\1';
if (FName.startswith(StringRef(&One, 1)))
FName = FName.substr(1);
- NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName));
- if (!FnLocals)
- FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName),
- NULL, 0, &M);
+
+ SmallString<32> Out;
+ NamedMDNode *FnLocals =
+ M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out));
FnLocals->addOperand(Node);
}
return DIVariable(Node);
@@ -1089,7 +1184,7 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
const std::string &Name,
DIFile F,
unsigned LineNo,
- DIType Ty,
+ DIType Ty,
SmallVector<Value *, 9> &addr) {
SmallVector<Value *, 9> Elts;
Elts.push_back(GetTagConstant(Tag));
@@ -1107,14 +1202,19 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
/// CreateBlock - This creates a descriptor for a lexical block with the
/// specified parent VMContext.
DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
- unsigned LineNo, unsigned Col) {
+ DIFile F, unsigned LineNo,
+ unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks.
+ static unsigned int unique_id = 0;
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_lexical_block),
Context,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt32Ty(VMContext), Col)
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
};
- return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4));
+ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6));
}
/// CreateNameSpace - This creates new descriptor for a namespace
@@ -1174,7 +1274,7 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
// If this block already has a terminator then insert this intrinsic
// before the terminator.
- if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
return CallInst::Create(DeclareFn, Args, Args+2, "", T);
else
return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);}
@@ -1203,7 +1303,7 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
- Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+ Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
D };
return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
@@ -1221,21 +1321,21 @@ void DebugInfoFinder::processModule(Module &M) {
++BI) {
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
processDeclare(DDI);
-
+
DebugLoc Loc = BI->getDebugLoc();
if (Loc.isUnknown())
continue;
-
+
LLVMContext &Ctx = BI->getContext();
DIDescriptor Scope(Loc.getScope(Ctx));
-
+
if (Scope.isCompileUnit())
addCompileUnit(DICompileUnit(Scope));
else if (Scope.isSubprogram())
processSubprogram(DISubprogram(Scope));
else if (Scope.isLexicalBlock())
processLexicalBlock(DILexicalBlock(Scope));
-
+
if (MDNode *IA = Loc.getInlinedAt(Ctx))
processLocation(DILocation(IA));
}
@@ -1380,7 +1480,7 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
return 0;
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
if (!DIG.isGlobalVariable())
continue;
if (DIGlobalVariable(DIG).getGlobal() == V)
@@ -1393,16 +1493,16 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
/// It looks through pointer casts too.
static const DbgDeclareInst *findDbgDeclare(const Value *V) {
V = V->stripPointerCasts();
-
+
if (!isa<Instruction>(V) && !isa<Argument>(V))
return 0;
-
+
const Function *F = NULL;
if (const Instruction *I = dyn_cast<Instruction>(V))
F = I->getParent()->getParent();
else if (const Argument *A = dyn_cast<Argument>(V))
F = A->getParent();
-
+
for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
BI != BE; ++BI)
@@ -1460,10 +1560,10 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DIDescriptor D(Scope);
if (D.isSubprogram())
return DISubprogram(Scope);
-
+
if (D.isLexicalBlock())
return getDISubprogram(DILexicalBlock(Scope).getContext());
-
+
return DISubprogram();
}
@@ -1471,9 +1571,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DICompositeType llvm::getDICompositeType(DIType T) {
if (T.isCompositeType())
return DICompositeType(T);
-
+
if (T.isDerivedType())
return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
-
+
return DICompositeType();
}
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index d95c3761bee6..9f340942f2cc 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -86,99 +86,100 @@ namespace {
struct DomViewer
: public DOTGraphTraitsViewer<DominatorTree, false> {
static char ID;
- DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", &ID){}
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){}
};
struct DomOnlyViewer
: public DOTGraphTraitsViewer<DominatorTree, true> {
static char ID;
- DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", &ID){}
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){}
};
struct PostDomViewer
: public DOTGraphTraitsViewer<PostDominatorTree, false> {
static char ID;
PostDomViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", &ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){}
};
struct PostDomOnlyViewer
: public DOTGraphTraitsViewer<PostDominatorTree, true> {
static char ID;
PostDomOnlyViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", &ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){}
};
} // end anonymous namespace
char DomViewer::ID = 0;
-RegisterPass<DomViewer> A("view-dom",
- "View dominance tree of function");
+INITIALIZE_PASS(DomViewer, "view-dom",
+ "View dominance tree of function", false, false);
char DomOnlyViewer::ID = 0;
-RegisterPass<DomOnlyViewer> B("view-dom-only",
- "View dominance tree of function "
- "(with no function bodies)");
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+ "View dominance tree of function (with no function bodies)",
+ false, false);
char PostDomViewer::ID = 0;
-RegisterPass<PostDomViewer> C("view-postdom",
- "View postdominance tree of function");
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+ "View postdominance tree of function", false, false);
char PostDomOnlyViewer::ID = 0;
-RegisterPass<PostDomOnlyViewer> D("view-postdom-only",
- "View postdominance tree of function "
- "(with no function bodies)");
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+ "View postdominance tree of function "
+ "(with no function bodies)",
+ false, false);
namespace {
struct DomPrinter
: public DOTGraphTraitsPrinter<DominatorTree, false> {
static char ID;
- DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", &ID) {}
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {}
};
struct DomOnlyPrinter
: public DOTGraphTraitsPrinter<DominatorTree, true> {
static char ID;
- DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", &ID) {}
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {}
};
struct PostDomPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, false> {
static char ID;
PostDomPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", &ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {}
};
struct PostDomOnlyPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, true> {
static char ID;
PostDomOnlyPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", &ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {}
};
} // end anonymous namespace
char DomPrinter::ID = 0;
-RegisterPass<DomPrinter> E("dot-dom",
- "Print dominance tree of function "
- "to 'dot' file");
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+ "Print dominance tree of function to 'dot' file",
+ false, false);
char DomOnlyPrinter::ID = 0;
-RegisterPass<DomOnlyPrinter> F("dot-dom-only",
- "Print dominance tree of function "
- "to 'dot' file "
- "(with no function bodies)");
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+ "Print dominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false);
char PostDomPrinter::ID = 0;
-RegisterPass<PostDomPrinter> G("dot-postdom",
- "Print postdominance tree of function "
- "to 'dot' file");
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file",
+ false, false);
char PostDomOnlyPrinter::ID = 0;
-RegisterPass<PostDomOnlyPrinter> H("dot-postdom-only",
- "Print postdominance tree of function "
- "to 'dot' file "
- "(with no function bodies)");
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+ "Print postdominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false);
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 65c7c6efd802..b3635283fda5 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -42,7 +42,7 @@ class BasicCallGraph : public ModulePass, public CallGraph {
public:
static char ID; // Class identification, replacement for typeinfo
- BasicCallGraph() : ModulePass(&ID), Root(0),
+ BasicCallGraph() : ModulePass(ID), Root(0),
ExternalCallingNode(0), CallsExternalNode(0) {}
// runOnModule - Compute the call graph for the specified module.
@@ -86,8 +86,8 @@ public:
/// an analysis interface through multiple inheritance. If needed, it should
/// override this to adjust the this pointer as needed for the specified pass
/// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&CallGraph::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &CallGraph::ID)
return (CallGraph*)this;
return this;
}
@@ -145,8 +145,8 @@ private:
for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
- CallSite CS = CallSite::get(II);
- if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(II)) {
+ CallSite CS(cast<Value>(II));
+ if (CS && !isa<DbgInfoIntrinsic>(II)) {
const Function *Callee = CS.getCalledFunction();
if (Callee)
Node->addCalledFunction(CS, getOrInsertFunction(Callee));
@@ -172,9 +172,8 @@ private:
} //End anonymous namespace
static RegisterAnalysisGroup<CallGraph> X("Call Graph");
-static RegisterPass<BasicCallGraph>
-Y("basiccg", "Basic CallGraph Construction", false, true);
-static RegisterAnalysisGroup<CallGraph, true> Z(Y);
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+ "Basic CallGraph Construction", false, true, true);
char CallGraph::ID = 0;
char BasicCallGraph::ID = 0;
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 0c01ee5b8284..b7a27cb288d9 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -45,7 +45,7 @@ class CGPassManager : public ModulePass, public PMDataManager {
public:
static char ID;
explicit CGPassManager(int Depth)
- : ModulePass(&ID), PMDataManager(Depth) { }
+ : ModulePass(ID), PMDataManager(Depth) { }
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
@@ -209,7 +209,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
// If the call edge is not from a call or invoke, then the function
// pass RAUW'd a call with another value. This can happen when
// constant folding happens of well known functions etc.
- CallSite::get(I->first).getInstruction() == 0) {
+ !CallSite(I->first)) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallSite CS = CallSite::get(I);
- if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue;
+ CallSite CS(cast<Value>(I));
+ if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
// If this call site already existed in the callgraph, just verify it
// matches up to expectations and remove it from CallSites.
@@ -582,9 +582,9 @@ namespace {
public:
static char ID;
- PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
+ PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {}
PrintCallGraphPass(const std::string &B, raw_ostream &o)
- : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
+ : CallGraphSCCPass(ID), Banner(B), Out(o) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index c4fb0b9a4e3d..8eed9d6f68bc 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -23,8 +23,8 @@
using namespace llvm;
char FindUsedTypes::ID = 0;
-static RegisterPass<FindUsedTypes>
-X("print-used-types", "Find Used Types", false, true);
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+ "Find Used Types", false, true);
// IncorporateType - Incorporate one type and all of its subtypes into the
// collection of used types.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f13deea41d4e..6759b0afdce3 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -47,14 +47,15 @@ namespace {
/// GlobalInfo - Maintain mod/ref info for all of the globals without
/// addresses taken that are read or written (transitively) by this
/// function.
- std::map<GlobalValue*, unsigned> GlobalInfo;
+ std::map<const GlobalValue*, unsigned> GlobalInfo;
/// MayReadAnyGlobal - May read global variables, but it is not known which.
bool MayReadAnyGlobal;
- unsigned getInfoForGlobal(GlobalValue *GV) const {
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<GlobalValue*, unsigned>::const_iterator I = GlobalInfo.find(GV);
+ std::map<const GlobalValue*, unsigned>::const_iterator I =
+ GlobalInfo.find(GV);
if (I != GlobalInfo.end())
Effect |= I->second;
return Effect;
@@ -71,23 +72,23 @@ namespace {
class GlobalsModRef : public ModulePass, public AliasAnalysis {
/// NonAddressTakenGlobals - The globals that do not have their addresses
/// taken.
- std::set<GlobalValue*> NonAddressTakenGlobals;
+ std::set<const GlobalValue*> NonAddressTakenGlobals;
/// IndirectGlobals - The memory pointed to by this global is known to be
/// 'owned' by the global.
- std::set<GlobalValue*> IndirectGlobals;
+ std::set<const GlobalValue*> IndirectGlobals;
/// AllocsForIndirectGlobals - If an instruction allocates memory for an
/// indirect global, this map indicates which one.
- std::map<Value*, GlobalValue*> AllocsForIndirectGlobals;
+ std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
/// FunctionInfo - For each function, keep track of what globals are
/// modified or read.
- std::map<Function*, FunctionRecord> FunctionInfo;
+ std::map<const Function*, FunctionRecord> FunctionInfo;
public:
static char ID;
- GlobalsModRef() : ModulePass(&ID) {}
+ GlobalsModRef() : ModulePass(ID) {}
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -107,39 +108,39 @@ namespace {
//
AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size);
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
}
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(Function *F,
- std::vector<PointerAccessInfo> *Info) {
+ ModRefBehavior getModRefBehavior(const Function *F) {
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
return DoesNotAccessMemory;
else if ((FR->FunctionEffect & Mod) == 0)
return OnlyReadsMemory;
}
- return AliasAnalysis::getModRefBehavior(F, Info);
+ return AliasAnalysis::getModRefBehavior(F);
}
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(CallSite CS,
- std::vector<PointerAccessInfo> *Info) {
- Function* F = CS.getCalledFunction();
- if (!F) return AliasAnalysis::getModRefBehavior(CS, Info);
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ const Function* F = CS.getCalledFunction();
+ if (!F) return AliasAnalysis::getModRefBehavior(CS);
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
return DoesNotAccessMemory;
else if ((FR->FunctionEffect & Mod) == 0)
return OnlyReadsMemory;
}
- return AliasAnalysis::getModRefBehavior(CS, Info);
+ return AliasAnalysis::getModRefBehavior(CS);
}
virtual void deleteValue(Value *V);
@@ -149,8 +150,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -158,8 +159,9 @@ namespace {
private:
/// getFunctionInfo - Return the function info for the function, or null if
/// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(Function *F) {
- std::map<Function*, FunctionRecord>::iterator I = FunctionInfo.find(F);
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function*, FunctionRecord>::iterator I =
+ FunctionInfo.find(F);
if (I != FunctionInfo.end())
return &I->second;
return 0;
@@ -175,9 +177,9 @@ namespace {
}
char GlobalsModRef::ID = 0;
-static RegisterPass<GlobalsModRef>
-X("globalsmodref-aa", "Simple mod/ref analysis for globals", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false);
Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
@@ -409,7 +411,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FunctionEffect |= CalleeFR->FunctionEffect;
// Incorporate callee's effects on globals into our info.
- for (std::map<GlobalValue*, unsigned>::iterator GI =
+ for (std::map<const GlobalValue*, unsigned>::iterator GI =
CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
GI != E; ++GI)
FR.GlobalInfo[GI->first] |= GI->second;
@@ -477,13 +479,13 @@ AliasAnalysis::AliasResult
GlobalsModRef::alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size) {
// Get the base object these pointers point to.
- Value *UV1 = const_cast<Value*>(V1->getUnderlyingObject());
- Value *UV2 = const_cast<Value*>(V2->getUnderlyingObject());
+ const Value *UV1 = V1->getUnderlyingObject();
+ const Value *UV2 = V2->getUnderlyingObject();
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
- GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
- GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
if (GV1 || GV2) {
// If the global's address is taken, pretend we don't know it's a pointer to
// the global.
@@ -503,12 +505,12 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
// so, we may be able to handle this. First check to see if the base pointer
// is a direct load from an indirect global.
GV1 = GV2 = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
if (IndirectGlobals.count(GV))
GV1 = GV;
- if (LoadInst *LI = dyn_cast<LoadInst>(UV2))
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
if (IndirectGlobals.count(GV))
GV2 = GV;
@@ -530,16 +532,17 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
}
AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
unsigned Known = ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
if (GV->hasLocalLinkage())
- if (Function *F = CS.getCalledFunction())
+ if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
- if (FunctionRecord *FR = getFunctionInfo(F))
+ if (const FunctionRecord *FR = getFunctionInfo(F))
Known = FR->getInfoForGlobal(GV);
if (Known == NoModRef)
@@ -558,7 +561,7 @@ void GlobalsModRef::deleteValue(Value *V) {
// any AllocRelatedValues for it.
if (IndirectGlobals.erase(GV)) {
// Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<Value*, GlobalValue*>::iterator
+ for (std::map<const Value*, const GlobalValue*>::iterator
I = AllocsForIndirectGlobals.begin(),
E = AllocsForIndirectGlobals.end(); I != E; ) {
if (I->second == GV) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 2c997dae5859..cdf667ad6eed 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,7 +21,6 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -29,8 +28,7 @@
using namespace llvm;
char IVUsers::ID = 0;
-static RegisterPass<IVUsers>
-X("iv-users", "Induction Variable Users", false, true);
+INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true);
Pass *llvm::createIVUsersPass() {
return new IVUsers();
@@ -39,27 +37,31 @@ Pass *llvm::createIVUsersPass() {
/// isInteresting - Test whether the given expression is "interesting" when
/// used by the given expression, within the context of analyzing the
/// given loop.
-static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
- // Anything loop-invariant is interesting.
- if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
- return true;
-
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+ ScalarEvolution *SE) {
// An addrec is interesting if it's affine or if it has an interesting start.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Keep things simple. Don't touch loop-variant strides.
if (AR->getLoop() == L)
return AR->isAffine() || !L->contains(I);
- // Otherwise recurse to see if the start value is interesting.
- return isInteresting(AR->getStart(), I, L);
+ // Otherwise recurse to see if the start value is interesting, and that
+ // the step value is not interesting, since we don't yet know how to
+ // do effective SCEV expansions for addrecs with interesting steps.
+ return isInteresting(AR->getStart(), I, L, SE) &&
+ !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
}
- // An add is interesting if any of its operands is.
+ // An add is interesting if exactly one of its operands is interesting.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ bool AnyInterestingYet = false;
for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
OI != OE; ++OI)
- if (isInteresting(*OI, I, L))
- return true;
- return false;
+ if (isInteresting(*OI, I, L, SE)) {
+ if (AnyInterestingYet)
+ return false;
+ AnyInterestingYet = true;
+ }
+ return AnyInterestingYet;
}
// Nothing else is interesting here.
@@ -85,7 +87,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// If we've come to an uninteresting expression, stop the traversal and
// call this a user.
- if (!isInteresting(ISE, I, L))
+ if (!isInteresting(ISE, I, L, SE))
return false;
SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -141,7 +143,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
}
IVUsers::IVUsers()
- : LoopPass(&ID) {
+ : LoopPass(ID) {
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -176,9 +178,6 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
}
OS << ":\n";
- // Use a default AssemblyAnnotationWriter to suppress the default info
- // comments, which aren't relevant here.
- AssemblyAnnotationWriter Annotator;
for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
E = IVUses.end(); UI != E; ++UI) {
OS << " ";
@@ -192,7 +191,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
OS << ")";
}
OS << " in ";
- UI->getUser()->print(OS, &Annotator);
+ UI->getUser()->print(OS);
OS << '\n';
}
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index b1df517c2a94..3e550f35c255 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -152,14 +152,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
if (isa<DbgInfoIntrinsic>(II))
continue; // Debug intrinsics don't count as size.
-
- CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
-
+
+ ImmutableCallSite CS(cast<Instruction>(II));
+
// If this function contains a call to setjmp or _setjmp, never inline
// it. This is a hack because we depend on the user marking their local
// variables as volatile if they are live across a setjmp call, and they
// probably won't do this in callers.
- if (Function *F = CS.getCalledFunction()) {
+ if (const Function *F = CS.getCalledFunction()) {
if (F->isDeclaration() &&
(F->getName() == "setjmp" || F->getName() == "_setjmp"))
callsSetJmp = true;
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index bb2cf53c85ef..dcbcac005a2f 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -51,7 +51,7 @@ namespace {
}
public:
static char ID; // Pass identification, replacement for typeid
- InstCount() : FunctionPass(&ID) {}
+ InstCount() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -64,8 +64,8 @@ namespace {
}
char InstCount::ID = 0;
-static RegisterPass<InstCount>
-X("instcount", "Counts the various types of Instructions", false, true);
+INITIALIZE_PASS(InstCount, "instcount",
+ "Counts the various types of Instructions", false, true);
FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index 1f17b77a5b96..1c9e14884316 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -16,8 +16,8 @@
using namespace llvm;
char IntervalPartition::ID = 0;
-static RegisterPass<IntervalPartition>
-X("intervals", "Interval Partition Construction", true, true);
+INITIALIZE_PASS(IntervalPartition, "intervals",
+ "Interval Partition Construction", true, true);
//===----------------------------------------------------------------------===//
// IntervalPartition Implementation
@@ -91,7 +91,7 @@ bool IntervalPartition::runOnFunction(Function &F) {
// distinguish it from a copy constructor. Always pass in false for now.
//
IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
- : FunctionPass(&ID) {
+ : FunctionPass(ID) {
assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
// Pass false to intervals_begin because we take ownership of it's memory
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index ff9026bede97..e32dbc444713 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -19,16 +19,18 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
char LazyValueInfo::ID = 0;
-static RegisterPass<LazyValueInfo>
-X("lazy-value-info", "Lazy Value Information Analysis", false, true);
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true);
namespace llvm {
FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
@@ -50,12 +52,15 @@ class LVILatticeVal {
enum LatticeValueTy {
/// undefined - This LLVM Value has no known value yet.
undefined,
+
/// constant - This LLVM Value has a specific constant value.
constant,
-
/// notconstant - This LLVM value is known to not have the specified value.
notconstant,
+ /// constantrange
+ constantrange,
+
/// overdefined - This instruction is not known to be constant, and we know
/// it has a value.
overdefined
@@ -63,42 +68,62 @@ class LVILatticeVal {
/// Val: This stores the current lattice value along with the Constant* for
/// the constant if this is a 'constant' or 'notconstant' value.
- PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+ LatticeValueTy Tag;
+ Constant *Val;
+ ConstantRange Range;
public:
- LVILatticeVal() : Val(0, undefined) {}
+ LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
static LVILatticeVal get(Constant *C) {
LVILatticeVal Res;
- Res.markConstant(C);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1));
+ else if (!isa<UndefValue>(C))
+ Res.markConstant(C);
return Res;
}
static LVILatticeVal getNot(Constant *C) {
LVILatticeVal Res;
- Res.markNotConstant(C);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ else
+ Res.markNotConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getRange(ConstantRange CR) {
+ LVILatticeVal Res;
+ Res.markConstantRange(CR);
return Res;
}
- bool isUndefined() const { return Val.getInt() == undefined; }
- bool isConstant() const { return Val.getInt() == constant; }
- bool isNotConstant() const { return Val.getInt() == notconstant; }
- bool isOverdefined() const { return Val.getInt() == overdefined; }
+ bool isUndefined() const { return Tag == undefined; }
+ bool isConstant() const { return Tag == constant; }
+ bool isNotConstant() const { return Tag == notconstant; }
+ bool isConstantRange() const { return Tag == constantrange; }
+ bool isOverdefined() const { return Tag == overdefined; }
Constant *getConstant() const {
assert(isConstant() && "Cannot get the constant of a non-constant!");
- return Val.getPointer();
+ return Val;
}
Constant *getNotConstant() const {
assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
- return Val.getPointer();
+ return Val;
+ }
+
+ ConstantRange getConstantRange() const {
+ assert(isConstantRange() &&
+ "Cannot get the constant-range of a non-constant-range!");
+ return Range;
}
/// markOverdefined - Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
return false;
- Val.setInt(overdefined);
+ Tag = overdefined;
return true;
}
@@ -110,9 +135,9 @@ public:
}
assert(isUndefined());
- Val.setInt(constant);
+ Tag = constant;
assert(V && "Marking constant with NULL");
- Val.setPointer(V);
+ Val = V;
return true;
}
@@ -128,9 +153,29 @@ public:
else
assert(isUndefined());
- Val.setInt(notconstant);
+ Tag = notconstant;
assert(V && "Marking constant with NULL");
- Val.setPointer(V);
+ Val = V;
+ return true;
+ }
+
+ /// markConstantRange - Return true if this is a change in status.
+ bool markConstantRange(const ConstantRange NewR) {
+ if (isConstantRange()) {
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ bool changed = Range == NewR;
+ Range = NewR;
+ return changed;
+ }
+
+ assert(isUndefined());
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ Tag = constantrange;
+ Range = NewR;
return true;
}
@@ -147,20 +192,39 @@ public:
isa<ConstantExpr>(RHS.getNotConstant()))
return markOverdefined();
return false;
- }
- if (isConstant()) {
+ } else if (isConstant()) {
if (getConstant() == RHS.getNotConstant() ||
isa<ConstantExpr>(RHS.getNotConstant()) ||
isa<ConstantExpr>(getConstant()))
return markOverdefined();
return markNotConstant(RHS.getNotConstant());
+ } else if (isConstantRange()) {
+ return markOverdefined();
}
assert(isUndefined() && "Unexpected lattice");
return markNotConstant(RHS.getNotConstant());
}
+ if (RHS.isConstantRange()) {
+ if (isConstantRange()) {
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ else
+ return markConstantRange(NewR);
+ } else if (!isUndefined()) {
+ return markOverdefined();
+ }
+
+ assert(isUndefined() && "Unexpected lattice");
+ return markConstantRange(RHS.getConstantRange());
+ }
+
// RHS must be a constant, we must be undef, constant, or notconstant.
+ assert(!isConstantRange() &&
+ "Constant and ConstantRange cannot be merged.");
+
if (isUndefined())
return markConstant(RHS.getConstant());
@@ -191,6 +255,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
if (Val.isNotConstant())
return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ else if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << '>';
return OS << "constant<" << *Val.getConstant() << '>';
}
}
@@ -206,17 +273,41 @@ namespace {
public:
/// BlockCacheEntryTy - This is a computed lattice value at the end of the
/// specified basic block for a Value* that depends on context.
- typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy;
+ typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy;
/// ValueCacheEntryTy - This is all of the cached block information for
/// exactly one Value*. The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
- typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy;
+ typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
private:
+ /// LVIValueHandle - A callback value handle update the cache when
+ /// values are erased.
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value* V) {
+ deleted();
+ }
+
+ LVIValueHandle &operator=(Value *V) {
+ return *this = LVIValueHandle(V, Parent);
+ }
+ };
+
/// ValueCache - This is all of the cached information for all values,
/// mapped from Value* to key information.
- DenseMap<Value*, ValueCacheEntryTy> ValueCache;
+ std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache;
+
public:
/// getValueInBlock - This is the query interface to determine the lattice
@@ -226,29 +317,23 @@ namespace {
/// getValueOnEdge - This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
- };
-} // end anonymous namespace
-
-namespace {
- struct BlockCacheEntryComparator {
- static int Compare(const void *LHSv, const void *RHSv) {
- const LazyValueInfoCache::BlockCacheEntryTy *LHS =
- static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv);
- const LazyValueInfoCache::BlockCacheEntryTy *RHS =
- static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv);
- if (LHS->first < RHS->first)
- return -1;
- if (LHS->first > RHS->first)
- return 1;
- return 0;
- }
- bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS,
- const LazyValueInfoCache::BlockCacheEntryTy &RHS) const {
- return LHS.first < RHS.first;
+ /// threadEdge - This is the update interface to inform the cache that an
+ /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+ /// NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ /// eraseBlock - This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// clear - Empty the cache.
+ void clear() {
+ ValueCache.clear();
+ OverDefinedCache.clear();
}
};
-}
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// LVIQuery Impl
@@ -267,78 +352,87 @@ namespace {
/// This is the current value being queried for.
Value *Val;
+ /// This is a pointer to the owning cache, for recursive queries.
+ LazyValueInfoCache &Parent;
+
/// This is all of the cached information about this value.
ValueCacheEntryTy &Cache;
+ /// This tracks, for each block, what values are overdefined.
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache;
+
/// NewBlocks - This is a mapping of the new BasicBlocks which have been
/// added to cache but that are not in sorted order.
- DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo;
+ DenseSet<BasicBlock*> NewBlockInfo;
+
public:
- LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) {
+ LVIQuery(Value *V, LazyValueInfoCache &P,
+ ValueCacheEntryTy &VC,
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC)
+ : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) {
}
~LVIQuery() {
// When the query is done, insert the newly discovered facts into the
// cache in sorted order.
if (NewBlockInfo.empty()) return;
-
- // Grow the cache to exactly fit the new data.
- Cache.reserve(Cache.size() + NewBlockInfo.size());
- // If we only have one new entry, insert it instead of doing a full-on
- // sort.
- if (NewBlockInfo.size() == 1) {
- BlockCacheEntryTy Entry = *NewBlockInfo.begin();
- ValueCacheEntryTy::iterator I =
- std::lower_bound(Cache.begin(), Cache.end(), Entry,
- BlockCacheEntryComparator());
- assert((I == Cache.end() || I->first != Entry.first) &&
- "Entry already in map!");
-
- Cache.insert(I, Entry);
- return;
+ for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(),
+ E = NewBlockInfo.end(); I != E; ++I) {
+ if (Cache[*I].isOverdefined())
+ OverDefinedCache.insert(std::make_pair(*I, Val));
}
-
- // TODO: If we only have two new elements, INSERT them both.
-
- Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end());
- array_pod_sort(Cache.begin(), Cache.end(),
- BlockCacheEntryComparator::Compare);
-
}
LVILatticeVal getBlockValue(BasicBlock *BB);
LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
private:
- LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB);
+ LVILatticeVal getCachedEntryForBlock(BasicBlock *BB);
};
} // end anonymous namespace
-/// getCachedEntryForBlock - See if we already have a value for this block. If
-/// so, return it, otherwise create a new entry in the NewBlockInfo map to use.
-LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
-
- // Do a binary search to see if we already have an entry for this block in
- // the cache set. If so, find it.
- if (!Cache.empty()) {
- ValueCacheEntryTy::iterator Entry =
- std::lower_bound(Cache.begin(), Cache.end(),
- BlockCacheEntryTy(BB, LVILatticeVal()),
- BlockCacheEntryComparator());
- if (Entry != Cache.end() && Entry->first == BB)
- return Entry->second;
+void LazyValueInfoCache::LVIValueHandle::deleted() {
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = Parent->OverDefinedCache.begin(),
+ E = Parent->OverDefinedCache.end();
+ I != E; ) {
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
+ ++I;
+ if (tmp->second == getValPtr())
+ Parent->OverDefinedCache.erase(tmp);
}
- // Otherwise, check to see if it's in NewBlockInfo or create a new entry if
- // not.
- return NewBlockInfo[BB];
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) {
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
+ ++I;
+ if (tmp->first == BB)
+ OverDefinedCache.erase(tmp);
+ }
+
+ for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
+ I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ I->second.erase(BB);
+}
+
+/// getCachedEntryForBlock - See if we already have a value for this block. If
+/// so, return it, otherwise create a new entry in the Cache map to use.
+LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
+ NewBlockInfo.insert(BB);
+ return Cache[BB];
}
LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// See if we already have a value for this block.
- LVILatticeVal &BBLV = getCachedEntryForBlock(BB);
+ LVILatticeVal BBLV = getCachedEntryForBlock(BB);
// If we've already computed this block's value, return it.
if (!BBLV.isUndefined()) {
@@ -350,13 +444,28 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// lattice value to overdefined, so that cycles will terminate and be
// conservatively correct.
BBLV.markOverdefined();
+ Cache[BB] = BBLV;
- // If V is live into BB, see if our predecessors know anything about it.
Instruction *BBI = dyn_cast<Instruction>(Val);
if (BBI == 0 || BBI->getParent() != BB) {
LVILatticeVal Result; // Start Undefined.
- unsigned NumPreds = 0;
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+ LoadInst *L = dyn_cast<LoadInst>(BI);
+ if (L && L->getPointerAddressSpace() == 0 &&
+ L->getPointerOperand()->getUnderlyingObject() ==
+ Val->getUnderlyingObject()) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+
+ unsigned NumPreds = 0;
// Loop over all of our predecessors, merging what we know from them into
// result.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -367,11 +476,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
if (Result.isOverdefined()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
return Result;
}
++NumPreds;
}
+
// If this is the entry block, we must be asking about an argument. The
// value is overdefined.
if (NumPreds == 0 && BB == &BB->getParent()->front()) {
@@ -382,24 +499,123 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined());
- return getCachedEntryForBlock(BB) = Result;
+ return Cache[BB] = Result;
}
// If this value is defined by an instruction in this block, we have to
// process it here somehow or return overdefined.
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- (void)PN;
- // TODO: PHI Translation in preds.
- } else {
+ LVILatticeVal Result; // Start Undefined.
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ Value* PhiVal = PN->getIncomingValueForBlock(*PI);
+ Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB));
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ return Result;
+ }
+ }
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ return Cache[BB] = Result;
}
-
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because inst def found.\n");
+ assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?");
+
+ // We can only analyze the definitions of certain classes of instructions
+ // (integral binops and casts at the moment), so bail if this isn't one.
LVILatticeVal Result;
- Result.markOverdefined();
- return getCachedEntryForBlock(BB) = Result;
+ if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+ !BBI->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ return Result;
+ }
+
+ // FIXME: We're currently limited to binops with a constant RHS. This should
+ // be improved.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+ if (BO && !isa<ConstantInt>(BO->getOperand(1))) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ Result.markOverdefined();
+ return Result;
+ }
+
+ // Figure out the range of the LHS. If that fails, bail.
+ LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB);
+ if (!LHSVal.isConstantRange()) {
+ Result.markOverdefined();
+ return Result;
+ }
+
+ ConstantInt *RHS = 0;
+ ConstantRange LHSRange = LHSVal.getConstantRange();
+ ConstantRange RHSRange(1);
+ const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ if (isa<BinaryOperator>(BBI)) {
+ RHS = dyn_cast<ConstantInt>(BBI->getOperand(1));
+ if (!RHS) {
+ Result.markOverdefined();
+ return Result;
+ }
+
+ RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1);
+ }
+
+ // NOTE: We're currently limited by the set of operations that ConstantRange
+ // can evaluate symbolically. Enhancing that set will allows us to analyze
+ // more definitions.
+ switch (BBI->getOpcode()) {
+ case Instruction::Add:
+ Result.markConstantRange(LHSRange.add(RHSRange));
+ break;
+ case Instruction::Sub:
+ Result.markConstantRange(LHSRange.sub(RHSRange));
+ break;
+ case Instruction::Mul:
+ Result.markConstantRange(LHSRange.multiply(RHSRange));
+ break;
+ case Instruction::UDiv:
+ Result.markConstantRange(LHSRange.udiv(RHSRange));
+ break;
+ case Instruction::Shl:
+ Result.markConstantRange(LHSRange.shl(RHSRange));
+ break;
+ case Instruction::LShr:
+ Result.markConstantRange(LHSRange.lshr(RHSRange));
+ break;
+ case Instruction::Trunc:
+ Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+ break;
+ case Instruction::SExt:
+ Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::ZExt:
+ Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::BitCast:
+ Result.markConstantRange(LHSRange);
+ break;
+
+ // Unhandled instructions are overdefined.
+ default:
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ break;
+ }
+
+ return Cache[BB] = Result;
}
@@ -420,28 +636,57 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
// it is.
if (BI->getCondition() == Val)
return LVILatticeVal::get(ConstantInt::get(
- Type::getInt1Ty(Val->getContext()), isTrueDest));
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if (ICI->isEquality() && ICI->getOperand(0) == Val &&
- isa<Constant>(ICI->getOperand(1))) {
+ ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (ICI && ICI->getOperand(0) == Val &&
+ isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality()) {
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
}
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ // Figure out the possible values of the query BEFORE this branch.
+ LVILatticeVal InBlock = getBlockValue(BBFrom);
+ if (!InBlock.isConstantRange())
+ return LVILatticeVal::getRange(TrueValues);
+
+ // Find all potential values that satisfy both the input and output
+ // conditions.
+ ConstantRange PossibleValues =
+ TrueValues.intersectWith(InBlock.getConstantRange());
+
+ return LVILatticeVal::getRange(PossibleValues);
+ }
+ }
}
}
// If the edge was formed by a switch on the value, then we may know exactly
// what it is.
if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
- // If BBTo is the default destination of the switch, we don't know anything.
- // Given a more powerful range analysis we could know stuff.
- if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) {
+ if (SI->getCondition() == Val) {
+ // We don't know anything in the default case.
+ if (SI->getDefaultDest() == BBTo) {
+ LVILatticeVal Result;
+ Result.markOverdefined();
+ return Result;
+ }
+
// We only know something if there is exactly one value that goes from
// BBFrom to BBTo.
unsigned NumEdges = 0;
@@ -474,7 +719,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
- LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB);
+ LVILatticeVal Result = LVIQuery(V, *this,
+ ValueCache[LVIValueHandle(V, this)],
+ OverDefinedCache).getBlockValue(BB);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
@@ -488,24 +735,80 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
LVILatticeVal Result =
- LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB);
+ LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)],
+ OverDefinedCache).getEdgeValue(FromBB, ToBB);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be possible
+ // to solve now. We do NOT try to proactively update these values. Instead,
+ // we clear their entries from the cache, and allow lazy updating to recompute
+ // them when needed.
+
+ // The updating process is fairly simple: we need to dropped cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ DenseSet<Value*> ClearSet;
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == OldSucc)
+ ClearSet.insert(I->second);
+ }
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ bool changed = false;
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end();
+ I != E; ++I) {
+ // If a value was marked overdefined in OldSucc, and is here too...
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI =
+ OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ if (OI == OverDefinedCache.end()) continue;
+
+ // Remove it from the caches.
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+ assert(CI != Entry.end() && "Couldn't find entry to update?");
+ Entry.erase(CI);
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
//===----------------------------------------------------------------------===//
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-bool LazyValueInfo::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
- // Fully lazy.
- return false;
-}
-
/// getCache - This lazily constructs the LazyValueInfoCache.
static LazyValueInfoCache &getCache(void *&PImpl) {
if (!PImpl)
@@ -513,6 +816,15 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
return *static_cast<LazyValueInfoCache*>(PImpl);
}
+bool LazyValueInfo::runOnFunction(Function &F) {
+ if (PImpl)
+ getCache(PImpl).clear();
+
+ TD = getAnalysisIfAvailable<TargetData>();
+ // Fully lazy.
+ return false;
+}
+
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
@@ -526,6 +838,11 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
if (Result.isConstant())
return Result.getConstant();
+ else if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
return 0;
}
@@ -537,6 +854,11 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
if (Result.isConstant())
return Result.getConstant();
+ else if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
return 0;
}
@@ -557,6 +879,36 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
return Unknown;
}
+ if (Result.isConstantRange()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return Unknown;
+
+ ConstantRange CR = Result.getConstantRange();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (!CR.contains(CI->getValue()))
+ return False;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return True;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (!CR.contains(CI->getValue()))
+ return True;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return False;
+ }
+
+ // Handle more complex predicates.
+ ConstantRange RHS(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS);
+ if (CR.intersectWith(TrueValues).isEmptySet())
+ return False;
+ else if (TrueValues.contains(CR))
+ return True;
+
+ return Unknown;
+ }
+
if (Result.isNotConstant()) {
// If this is an equality comparison, we can try to fold it knowing that
// "V != C1".
@@ -579,4 +931,11 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
return Unknown;
}
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock* NewSucc) {
+ if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+ if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 741965929890..7f51202ecb55 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -20,11 +20,8 @@ using namespace llvm;
// Register this pass...
char LibCallAliasAnalysis::ID = 0;
-static RegisterPass<LibCallAliasAnalysis>
-X("libcall-aa", "LibCall Alias Analysis", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+ "LibCall Alias Analysis", false, true, false);
FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
return new LibCallAliasAnalysis(LCI);
@@ -46,7 +43,7 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
/// vs the specified pointer/size.
AliasAnalysis::ModRefResult
LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- CallSite CS, Value *P,
+ ImmutableCallSite CS, const Value *P,
unsigned Size) {
// If we have a function, check to see what kind of mod/ref effects it
// has. Start by including any info globally known about the function.
@@ -120,13 +117,14 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
// specified memory object.
//
AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
ModRefResult MRInfo = ModRef;
// If this is a direct call to a function that LCI knows about, get the
// information about the runtime function.
if (LCI) {
- if (Function *F = CS.getCalledFunction()) {
+ if (const Function *F = CS.getCalledFunction()) {
if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size));
if (MRInfo == NoModRef) return NoModRef;
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index e0060c3e89b1..81b0f46f3740 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -40,7 +40,8 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
/// the specified function if we have it. If not, return null.
-const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const {
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
/// If this is the first time we are querying for this info, lazily construct
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9f1b30d2cf45..a9d972435f5f 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -108,7 +108,7 @@ namespace {
raw_string_ostream MessagesStr;
static char ID; // Pass identification, replacement for typeid
- Lint() : FunctionPass(&ID), MessagesStr(Messages) {}
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {}
virtual bool runOnFunction(Function &F);
@@ -167,8 +167,7 @@ namespace {
}
char Lint::ID = 0;
-static RegisterPass<Lint>
-X("lint", "Statically lint-checks LLVM IR", false, true);
+INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true);
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -247,8 +246,7 @@ void Lint::visitCallSite(CallSite CS) {
// where nothing is known.
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
- Assert1(AI == BI ||
- AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias,
+ Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias,
"Unusual: noalias argument aliases another argument", &I);
}
@@ -520,6 +518,9 @@ void Lint::visitVAArgInst(VAArgInst &I) {
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee);
+
+ Assert1(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 23964ffc457e..0225f4fa2548 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -22,10 +22,10 @@ namespace llvm {
}
char LiveValues::ID = 0;
-static RegisterPass<LiveValues>
-X("live-values", "Value Liveness Analysis", false, true);
+INITIALIZE_PASS(LiveValues, "live-values",
+ "Value Liveness Analysis", false, true);
-LiveValues::LiveValues() : FunctionPass(&ID) {}
+LiveValues::LiveValues() : FunctionPass(ID) {}
void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index e1019474cf43..82c02dcd1342 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -46,8 +46,8 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() {
return new LoopDependenceAnalysis();
}
-static RegisterPass<LoopDependenceAnalysis>
-R("lda", "Loop Dependence Analysis", false, true);
+INITIALIZE_PASS(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true);
char LoopDependenceAnalysis::ID = 0;
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 818d0a9dd114..46219d1b6f55 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -38,8 +38,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
char LoopInfo::ID = 0;
-static RegisterPass<LoopInfo>
-X("loops", "Natural Loop Information", true, true);
+INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
//===----------------------------------------------------------------------===//
// Loop implementation
@@ -124,14 +123,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
BasicBlock *H = getHeader();
BasicBlock *Incoming = 0, *Backedge = 0;
- typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits;
- InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H);
- assert(PI != InvBlockTraits::child_end(H) &&
+ pred_iterator PI = pred_begin(H);
+ assert(PI != pred_end(H) &&
"Loop must have at least one backedge!");
Backedge = *PI++;
- if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop
+ if (PI == pred_end(H)) return 0; // dead loop
Incoming = *PI++;
- if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges?
+ if (PI != pred_end(H)) return 0; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
@@ -157,18 +155,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return 0;
}
-/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
-/// the canonical induction variable value for the "next" iteration of the
-/// loop. This always succeeds if getCanonicalInductionVariable succeeds.
-///
-Instruction *Loop::getCanonicalInductionVariableIncrement() const {
- if (PHINode *PN = getCanonicalInductionVariable()) {
- bool P1InLoop = contains(PN->getIncomingBlock(1));
- return cast<Instruction>(PN->getIncomingValue(P1InLoop));
- }
- return 0;
-}
-
/// getTripCount - Return a loop-invariant LLVM value indicating the number of
/// times the loop will be executed. Note that this means that the backedge
/// of the loop executes N-1 times. If the trip-count cannot be determined,
@@ -180,12 +166,12 @@ Instruction *Loop::getCanonicalInductionVariableIncrement() const {
Value *Loop::getTripCount() const {
// Canonical loops will end with a 'cmp ne I, V', where I is the incremented
// canonical induction variable and V is the trip count of the loop.
- Instruction *Inc = getCanonicalInductionVariableIncrement();
- if (Inc == 0) return 0;
- PHINode *IV = cast<PHINode>(Inc->getOperand(0));
+ PHINode *IV = getCanonicalInductionVariable();
+ if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
- BasicBlock *BackedgeBlock =
- IV->getIncomingBlock(contains(IV->getIncomingBlock(1)));
+ bool P0InLoop = contains(IV->getIncomingBlock(0));
+ Value *Inc = IV->getIncomingValue(!P0InLoop);
+ BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
if (BI->isConditional()) {
@@ -341,16 +327,12 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
BasicBlock *current = *BI;
switchExitBlocks.clear();
- typedef GraphTraits<BasicBlock *> BlockTraits;
- typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits;
- for (BlockTraits::ChildIteratorType I =
- BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
- I != E; ++I) {
+ for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
// If block is inside the loop then it is not a exit block.
if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
continue;
- InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I);
+ pred_iterator PI = pred_begin(*I);
BasicBlock *firstPred = *PI;
// If current basic block is this exit block's first predecessor
@@ -363,8 +345,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
// If a terminator has more then two successors, for example SwitchInst,
// then it is possible that there are multiple edges from current block
// to one exit block.
- if (std::distance(BlockTraits::child_begin(current),
- BlockTraits::child_end(current)) <= 2) {
+ if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
ExitBlocks.push_back(*I);
continue;
}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 2727d2f9465c..15d4db8f5f98 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -30,9 +30,9 @@ private:
public:
static char ID;
- PrintLoopPass() : LoopPass(&ID), Out(dbgs()) {}
+ PrintLoopPass() : LoopPass(ID), Out(dbgs()) {}
PrintLoopPass(const std::string &B, raw_ostream &o)
- : LoopPass(&ID), Banner(B), Out(o) {}
+ : LoopPass(ID), Banner(B), Out(o) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -59,7 +59,7 @@ char PrintLoopPass::ID = 0;
char LPPassManager::ID = 0;
LPPassManager::LPPassManager(int Depth)
- : FunctionPass(&ID), PMDataManager(Depth) {
+ : FunctionPass(ID), PMDataManager(Depth) {
skipThisLoop = false;
redoThisLoop = false;
LI = NULL;
@@ -183,7 +183,7 @@ void LPPassManager::redoLoop(Loop *L) {
void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
BasicBlock *To, Loop *L) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *LP = (LoopPass *)getContainedPass(Index);
+ LoopPass *LP = getContainedPass(Index);
LP->cloneBasicBlockAnalysis(From, To, L);
}
}
@@ -198,7 +198,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
}
}
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *LP = (LoopPass *)getContainedPass(Index);
+ LoopPass *LP = getContainedPass(Index);
LP->deleteAnalysisValue(V, L);
}
}
@@ -240,7 +240,7 @@ bool LPPassManager::runOnFunction(Function &F) {
I != E; ++I) {
Loop *L = *I;
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass*)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
Changed |= P->doInitialization(L, *this);
}
}
@@ -254,7 +254,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass*)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
@@ -320,7 +320,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Finalization
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass *)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
Changed |= P->doFinalization();
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 1f54d740db9d..d18d5ce0ea4c 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -46,11 +46,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
-static RegisterPass<MemoryDependenceAnalysis> X("memdep",
- "Memory Dependence Analysis", false, true);
+INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true);
MemoryDependenceAnalysis::MemoryDependenceAnalysis()
-: FunctionPass(&ID), PredCache(0) {
+: FunctionPass(ID), PredCache(0) {
}
MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
}
@@ -120,33 +120,21 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
Pointer = CI->getArgOperand(0);
// calls to free() erase the entire structure
PointerSize = ~0ULL;
- } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ } else if (CallSite InstCS = cast<Value>(Inst)) {
// Debug intrinsics don't cause dependences.
if (isa<DbgInfoIntrinsic>(Inst)) continue;
- CallSite InstCS = CallSite::get(Inst);
// If these two calls do not interfere, look past it.
switch (AA->getModRefInfo(CS, InstCS)) {
case AliasAnalysis::NoModRef:
- // If the two calls don't interact (e.g. InstCS is readnone) keep
- // scanning.
+ // If the two calls are the same, return InstCS as a Def, so that
+ // CS can be found redundant and eliminated.
+ if (isReadOnlyCall && InstCS.onlyReadsMemory() &&
+ CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ return MemDepResult::getDef(Inst);
+
+ // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // keep scanning.
continue;
- case AliasAnalysis::Ref:
- // If the two calls read the same memory locations and CS is a readonly
- // function, then we have two cases: 1) the calls may not interfere with
- // each other at all. 2) the calls may produce the same value. In case
- // #1 we want to ignore the values, in case #2, we want to return Inst
- // as a Def dependence. This allows us to CSE in cases like:
- // X = strlen(P);
- // memchr(...);
- // Y = strlen(P); // Y = X
- if (isReadOnlyCall) {
- if (CS.getCalledFunction() != 0 &&
- CS.getCalledFunction() == InstCS.getCalledFunction())
- return MemDepResult::getDef(Inst);
- // Ignore unrelated read/read call dependences.
- continue;
- }
- // FALL THROUGH
default:
return MemDepResult::getClobber(Inst);
}
@@ -196,8 +184,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R =
- AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U);
+ AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr);
if (R == AliasAnalysis::MustAlias) {
InvariantTag = II->getArgOperand(0);
continue;
@@ -209,8 +196,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R =
- AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U);
+ AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr);
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(II);
}
@@ -387,7 +373,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
break;
default:
- CallSite QueryCS = CallSite::get(QueryInst);
+ CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
QueryParent);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 556d4c8aab54..2cc1c2aa005c 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -30,7 +30,7 @@ namespace {
DebugInfoFinder Finder;
public:
static char ID; // Pass identification, replacement for typeid
- ModuleDebugInfoPrinter() : ModulePass(&ID) {}
+ ModuleDebugInfoPrinter() : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -42,9 +42,8 @@ namespace {
}
char ModuleDebugInfoPrinter::ID = 0;
-static RegisterPass<ModuleDebugInfoPrinter>
-X("module-debuginfo",
- "Decodes module-level debug info", false, true);
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+ "Decodes module-level debug info", false, true);
ModulePass *llvm::createModuleDebugInfoPrinterPass() {
return new ModuleDebugInfoPrinter();
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 14df0b719879..07f46824700a 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
char PointerTracking::ID = 0;
-PointerTracking::PointerTracking() : FunctionPass(&ID) {}
+PointerTracking::PointerTracking() : FunctionPass(ID) {}
bool PointerTracking::runOnFunction(Function &F) {
predCache.clear();
@@ -144,6 +144,55 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P,
return SE->getCouldNotCompute();
}
+Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const
+{
+ Value *V = P->stripPointerCasts();
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ Ty = AI->getAllocatedType();
+ // arraySize elements of type Ty.
+ return AI->getArraySize();
+ }
+
+ if (CallInst *CI = extractMallocCall(V)) {
+ Ty = getMallocAllocatedType(CI);
+ if (!Ty)
+ return 0;
+ Value *arraySize = getMallocArraySize(CI, TD);
+ if (!arraySize) {
+ Ty = Type::getInt8Ty(P->getContext());
+ return CI->getArgOperand(0);
+ }
+ // arraySize elements of type Ty.
+ return arraySize;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (GV->hasDefinitiveInitializer()) {
+ Constant *C = GV->getInitializer();
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ Ty = ATy->getElementType();
+ return ConstantInt::get(Type::getInt32Ty(P->getContext()),
+ ATy->getNumElements());
+ }
+ }
+ Ty = cast<PointerType>(GV->getType())->getElementType();
+ return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1);
+ //TODO: implement more tracking for globals
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(V)) {
+ CallSite CS(CI);
+ Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (F == reallocFunc) {
+ Ty = Type::getInt8Ty(P->getContext());
+ // realloc allocates arg1 bytes.
+ return CS.getArgument(1);
+ }
+ }
+
+ return 0;
+}
+
// Calculates the number of elements of type Ty allocated for P.
const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
const Type *Ty)
@@ -263,5 +312,5 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const {
}
}
-static RegisterPass<PointerTracking> X("pointertracking",
- "Track pointer bounds", false, true);
+INITIALIZE_PASS(PointerTracking, "pointertracking",
+ "Track pointer bounds", false, true);
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 7354afa181b2..cbe8d1867e4f 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
char PostDominatorTree::ID = 0;
char PostDominanceFrontier::ID = 0;
-static RegisterPass<PostDominatorTree>
-F("postdomtree", "Post-Dominator Tree Construction", true, true);
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+ "Post-Dominator Tree Construction", true, true);
bool PostDominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -53,8 +53,8 @@ FunctionPass* llvm::createPostDomTree() {
// PostDominanceFrontier Implementation
//===----------------------------------------------------------------------===//
-static RegisterPass<PostDominanceFrontier>
-H("postdomfrontier", "Post-Dominance Frontier Construction", true, true);
+INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier",
+ "Post-Dominance Frontier Construction", true, true);
const DominanceFrontier::DomSetType &
PostDominanceFrontier::calculate(const PostDominatorTree &DT,
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index da4ce4769262..ecc0a1845307 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(&ID), ExecCount(execcount) {
+ : FunctionPass(ID), ExecCount(execcount) {
if (execcount == 0) ExecCount = LoopWeight;
}
@@ -59,8 +59,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -72,13 +72,11 @@ namespace {
} // End of anonymous namespace
char ProfileEstimatorPass::ID = 0;
-static RegisterPass<ProfileEstimatorPass>
-X("profile-estimator", "Estimate profiling information", false, true);
-
-static RegisterAnalysisGroup<ProfileInfo> Y(X);
+INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false);
namespace llvm {
- const PassInfo *ProfileEstimatorPassID = &X;
+ char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
FunctionPass *createProfileEstimatorPass() {
return new ProfileEstimatorPass();
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 8d2712fd6e06..fc7f28662c01 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -1076,14 +1076,14 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con
namespace {
struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(&ID) {}
+ NoProfileInfo() : ImmutablePass(ID) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -1096,10 +1096,7 @@ namespace {
char NoProfileInfo::ID = 0;
// Register this pass...
-static RegisterPass<NoProfileInfo>
-X("no-profile", "No Profile Information", false, true);
-
-// Declare that we implement the ProfileInfo interface
-static RegisterAnalysisGroup<ProfileInfo, true> Y(X);
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+ "No Profile Information", false, true, true);
ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 8ea4ecf54f98..d325b574e848 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -45,7 +45,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
explicit LoaderPass(const std::string &filename = "")
- : ModulePass(&ID), Filename(filename) {
+ : ModulePass(ID), Filename(filename) {
if (filename.empty()) Filename = ProfileInfoFilename;
}
@@ -67,8 +67,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -79,12 +79,10 @@ namespace {
} // End of anonymous namespace
char LoaderPass::ID = 0;
-static RegisterPass<LoaderPass>
-X("profile-loader", "Load profile information from llvmprof.out", false, true);
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+ "Load profile information from llvmprof.out", false, true, false);
-static RegisterAnalysisGroup<ProfileInfo> Y(X);
-
-const PassInfo *llvm::ProfileLoaderPassID = &X;
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 5d87e14a97b4..3f01b2d592bc 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -59,10 +59,10 @@ namespace llvm {
public:
static char ID; // Class identification, replacement for typeinfo
- explicit ProfileVerifierPassT () : FunctionPass(&ID) {
+ explicit ProfileVerifierPassT () : FunctionPass(ID) {
DisableAssertions = ProfileVerifierDisableAssertions;
}
- explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID),
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
DisableAssertions(da) {
}
@@ -366,8 +366,8 @@ namespace llvm {
char ProfileVerifierPassT<FType, BType>::ID = 0;
}
-static RegisterPass<ProfileVerifierPass>
-X("profile-verifier", "Verify profiling information", false, true);
+INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true);
namespace llvm {
FunctionPass *createProfileVerifierPass() {
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 000000000000..abc057a773a9
--- /dev/null
+++ b/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,749 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+//===----------------------------------------------------------------------===//
+/// PrintStyle - Print region in difference ways.
+enum PrintStyle { PrintNone, PrintBB, PrintRN };
+
+cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+ cl::desc("style of printing regions"),
+ cl::values(
+ clEnumValN(PrintNone, "none", "print no details"),
+ clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"),
+ clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"),
+ clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+ DominatorTree *dt, Region *Parent)
+ : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+ ie = BBNodeMap.end(); it != ie; ++it)
+ delete it->second;
+
+ // Only clean the cache for this Region. Caches of child Regions will be
+ // cleaned when the child Regions are deleted.
+ BBNodeMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+ BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+ assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // Toplevel region.
+ if (!exit)
+ return true;
+
+ return (DT->dominates(entry, BB)
+ && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+ // BBs that are not part of any loop are element of the Loop
+ // described by the NULL pointer. This loop is not part of any region,
+ // except if the region describes the whole function.
+ if (L == 0)
+ return getExit() == 0;
+
+ if (!contains(L->getHeader()))
+ return false;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+ BE = ExitingBlocks.end(); BI != BE; ++BI)
+ if (!contains(*BI))
+ return false;
+
+ return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+ if (!contains(L))
+ return 0;
+
+ while (L && contains(L->getParentLoop())) {
+ L = L->getParentLoop();
+ }
+
+ return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+ assert(LI && BB && "LI and BB cannot be null!");
+ Loop *L = LI->getLoopFor(BB);
+ return outermostLoopInRegion(L);
+}
+
+bool Region::isSimple() const {
+ bool isSimple = true;
+ bool found = false;
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // TopLevelRegion
+ if (!exit)
+ return false;
+
+ for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+ ++PI) {
+ BasicBlock *Pred = *PI;
+ if (DT->getNode(Pred) && !contains(Pred)) {
+ if (found) {
+ isSimple = false;
+ break;
+ }
+ found = true;
+ }
+ }
+
+ found = false;
+
+ for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+ ++PI)
+ if (contains(*PI)) {
+ if (found) {
+ isSimple = false;
+ break;
+ }
+ found = true;
+ }
+
+ return isSimple;
+}
+
+std::string Region::getNameStr() const {
+ std::string exitName;
+ std::string entryName;
+
+ if (getEntry()->getName().empty()) {
+ raw_string_ostream OS(entryName);
+
+ WriteAsOperand(OS, getEntry(), false);
+ entryName = OS.str();
+ } else
+ entryName = getEntry()->getNameStr();
+
+ if (getExit()) {
+ if (getExit()->getName().empty()) {
+ raw_string_ostream OS(exitName);
+
+ WriteAsOperand(OS, getExit(), false);
+ exitName = OS.str();
+ } else
+ exitName = getExit()->getNameStr();
+ } else
+ exitName = "<Function Return>";
+
+ return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+ if (!contains(BB))
+ llvm_unreachable("Broken region found!");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (!contains(*SI) && exit != *SI)
+ llvm_unreachable("Broken region found!");
+
+ if (entry != BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+ if (!contains(*SI))
+ llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+ BasicBlock *exit = getExit();
+
+ visited->insert(BB);
+
+ verifyBBInRegion(BB);
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (*SI != exit && visited->find(*SI) == visited->end())
+ verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+ // Only do verification when user wants to, otherwise this expensive
+ // check will be invoked by PassManager.
+ if (!VerifyRegionInfo) return;
+
+ std::set<BasicBlock*> visited;
+ verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+ for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->verifyRegionNest();
+
+ verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+ return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+ return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+ return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+ return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+ return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+ return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+ Region *R = RI->getRegionFor(BB);
+
+ if (!R || R == this)
+ return 0;
+
+ // If we pass the BB out of this region, that means our code is broken.
+ assert(contains(R) && "BB not in current region!");
+
+ while (contains(R->getParent()) && R->getParent() != this)
+ R = R->getParent();
+
+ if (R->getEntry() != BB)
+ return 0;
+
+ return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+
+ BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+ if (at != BBNodeMap.end())
+ return at->second;
+
+ RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+ BBNodeMap.insert(std::make_pair(BB, NewNode));
+ return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+ if (Region* Child = getSubRegionNode(BB))
+ return Child->getNode();
+
+ return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ (*I)->parent = To;
+ To->children.push_back(*I);
+ }
+ children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion) {
+ assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ SubRegion->parent = this;
+ // Set up the region node.
+ assert(std::find(children.begin(), children.end(), SubRegion) == children.end()
+ && "Node already exist!");
+ children.push_back(SubRegion);
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+ assert(Child->parent == this && "Child is not a child of this region!");
+ Child->parent = 0;
+ RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ assert(I != children.end() && "Region does not exit. Unable to remove.");
+ children.erase(children.begin()+(I-begin()));
+ return Child;
+}
+
+unsigned Region::getDepth() const {
+ unsigned Depth = 0;
+
+ for (Region *R = parent; R != 0; R = R->parent)
+ ++Depth;
+
+ return Depth;
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
+ if (print_tree)
+ OS.indent(level*2) << "[" << level << "] " << getNameStr();
+ else
+ OS.indent(level*2) << getNameStr();
+
+ OS << "\n";
+
+
+ if (printStyle != PrintNone) {
+ OS.indent(level*2) << "{\n";
+ OS.indent(level*2 + 2);
+
+ if (printStyle == PrintBB) {
+ for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ","
+ } else if (printStyle == PrintRN) {
+ for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ",
+ }
+
+ OS << "\n";
+ }
+
+ if (print_tree)
+ for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->print(OS, print_tree, level+1);
+
+ if (printStyle != PrintNone)
+ OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+ print(dbgs(), true, getDepth());
+}
+
+void Region::clearNodeCache() {
+ BBNodeMap.clear();
+ for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+ BasicBlock *exit) const {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+ return false;
+ }
+ return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+ typedef DominanceFrontier::DomSetType DST;
+
+ DST *entrySuccs = &DF->find(entry)->second;
+
+ // Exit is the header of a loop that contains the entry. In this case,
+ // the dominance frontier must only contain the exit.
+ if (!DT->dominates(entry, exit)) {
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI)
+ if (*SI != exit && *SI != entry)
+ return false;
+
+ return true;
+ }
+
+ DST *exitSuccs = &DF->find(exit)->second;
+
+ // Do not allow edges leaving the region.
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI) {
+ if (*SI == exit || *SI == entry)
+ continue;
+ if (exitSuccs->find(*SI) == exitSuccs->end())
+ return false;
+ if (!isCommonDomFrontier(*SI, entry, exit))
+ return false;
+ }
+
+ // Do not allow edges pointing into the region.
+ for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+ SI != SE; ++SI)
+ if (DT->properlyDominates(entry, *SI) && *SI != exit)
+ return false;
+
+
+ return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+ BBtoBBMap *ShortCut) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ BBtoBBMap::iterator e = ShortCut->find(exit);
+
+ if (e == ShortCut->end())
+ // No further region at exit available.
+ (*ShortCut)[entry] = exit;
+ else {
+ // We found a region e that starts at exit. Therefore (entry, e->second)
+ // is also a region, that is larger than (entry, exit). Insert the
+ // larger one.
+ BasicBlock *BB = e->second;
+ (*ShortCut)[entry] = BB;
+ }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+ BBtoBBMap *ShortCut) const {
+ BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+ if (e == ShortCut->end())
+ return N->getIDom();
+
+ return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+ if (num_successors <= 1 && exit == *(succ_begin(entry)))
+ return true;
+
+ return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+ ++numRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ if (isTrivialRegion(entry, exit))
+ return 0;
+
+ Region *region = new Region(entry, exit, this, DT);
+ BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+ region->verifyRegion();
+ #else
+ DEBUG(region->verifyRegion());
+ #endif
+
+ updateStatistics(region);
+ return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+ assert(entry);
+
+ DomTreeNode *N = PDT->getNode(entry);
+
+ if (!N)
+ return;
+
+ Region *lastRegion= 0;
+ BasicBlock *lastExit = entry;
+
+ // As only a BasicBlock that postdominates entry can finish a region, walk the
+ // post dominance tree upwards.
+ while ((N = getNextPostDom(N, ShortCut))) {
+ BasicBlock *exit = N->getBlock();
+
+ if (!exit)
+ break;
+
+ if (isRegion(entry, exit)) {
+ Region *newRegion = createRegion(entry, exit);
+
+ if (lastRegion)
+ newRegion->addSubRegion(lastRegion);
+
+ lastRegion = newRegion;
+ lastExit = exit;
+ }
+
+ // This can never be a region, so stop the search.
+ if (!DT->dominates(entry, exit))
+ break;
+ }
+
+ // Tried to create regions from entry to lastExit. Next time take a
+ // shortcut from entry to lastExit.
+ if (lastExit != entry)
+ insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+ BasicBlock *entry = &(F.getEntryBlock());
+ DomTreeNode *N = DT->getNode(entry);
+
+ // Iterate over the dominance tree in post order to start with the small
+ // regions from the bottom of the dominance tree. If the small regions are
+ // detected first, detection of bigger regions is faster, as we can jump
+ // over the small regions.
+ for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+ ++FI) {
+ findRegionsWithEntry(FI->getBlock(), ShortCut);
+ }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+ while (region->parent)
+ region = region->getParent();
+
+ return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+ BasicBlock *BB = N->getBlock();
+
+ // Passed region exit
+ while (BB == region->getExit())
+ region = region->getParent();
+
+ BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+ // This basic block is a start block of a region. It is already in the
+ // BBtoRegion relation. Only the child basic blocks have to be updated.
+ if (it != BBtoRegion.end()) {
+ Region *newRegion = it->second;;
+ region->addSubRegion(getTopMostParent(newRegion));
+ region = newRegion;
+ } else {
+ BBtoRegion[BB] = region;
+ }
+
+ for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+ buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+ BBtoRegion.clear();
+ if (TopLevelRegion)
+ delete TopLevelRegion;
+ TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+ TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+ releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+ // ShortCut a function where for every BB the exit of the largest region
+ // starting with BB is stored. These regions can be threated as single BBS.
+ // This improves performance on linear CFGs.
+ BBtoBBMap ShortCut;
+
+ scanForRegions(F, &ShortCut);
+ BasicBlock *BB = &F.getEntryBlock();
+ buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+ releaseMemory();
+
+ DT = &getAnalysis<DominatorTree>();
+ PDT = &getAnalysis<PostDominatorTree>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ updateStatistics(TopLevelRegion);
+
+ Calculate(F);
+
+ return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "Region tree:\n";
+ TopLevelRegion->print(OS, true, 0);
+ OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (!VerifyRegionInfo) return;
+
+ TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+ BBtoRegionMap::const_iterator I=
+ BBtoRegion.find(BB);
+ return I != BBtoRegion.end() ? I->second : 0;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+ return getRegionFor(BB);
+}
+
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+ BasicBlock *Exit = NULL;
+
+ while (true) {
+ // Get largest region that starts at BB.
+ Region *R = getRegionFor(BB);
+ while (R && R->getParent() && R->getParent()->getEntry() == BB)
+ R = R->getParent();
+
+ // Get the single exit of BB.
+ if (R && R->getEntry() == BB)
+ Exit = R->getExit();
+ else if (++succ_begin(BB) == succ_end(BB))
+ Exit = *succ_begin(BB);
+ else // No single exit exists.
+ return Exit;
+
+ // Get largest region that starts at Exit.
+ Region *ExitR = getRegionFor(Exit);
+ while (ExitR && ExitR->getParent()
+ && ExitR->getParent()->getEntry() == Exit)
+ ExitR = ExitR->getParent();
+
+ for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+ ++PI)
+ if (!R->contains(*PI) && !ExitR->contains(*PI))
+ break;
+
+ // This stops infinite cycles.
+ if (DT->dominates(Exit, BB))
+ break;
+
+ BB = Exit;
+ }
+
+ return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+ assert (A && B && "One of the Regions is NULL");
+
+ if (A->contains(B)) return A;
+
+ while (!B->contains(A))
+ B = B->getParent();
+
+ return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+ Region* ret = Regions.back();
+ Regions.pop_back();
+
+ for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+ E = Regions.end(); I != E; ++I)
+ ret = getCommonRegion(ret, *I);
+
+ return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+ Region* ret = getRegionFor(BBs.back());
+ BBs.pop_back();
+
+ for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+ E = BBs.end(); I != E; ++I)
+ ret = getCommonRegion(ret, getRegionFor(*I));
+
+ return ret;
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true);
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createRegionInfoPass() {
+ return new RegionInfo();
+ }
+}
+
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 000000000000..fee5c1bae976
--- /dev/null
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,186 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+ cl::desc("Show only simple regions in the graphviz viewer"),
+ cl::Hidden,
+ cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+
+ return "Not implemented";
+ }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+ static std::string getGraphName(RegionInfo *DT) {
+ return "Region Graph";
+ }
+
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+ G->getTopLevelRegion());
+ }
+
+ // Print the cluster of the subregions. This groups the single basic blocks
+ // and adds a different background color for each group.
+ static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ unsigned depth = 0) {
+ raw_ostream &O = GW.getOStream();
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ << " {\n";
+ O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+ if (!onlySimpleRegions || R->isSimple()) {
+ O.indent(2 * (depth + 1)) << "style = filled;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+ } else {
+ O.indent(2 * (depth + 1)) << "style = solid;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ }
+
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ printRegionCluster(*RI, GW, depth + 1);
+
+ RegionInfo *RI = R->getRegionInfo();
+
+ for (Region::const_block_iterator BI = R->block_begin(),
+ BE = R->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+ if (RI->getRegionFor(BB) == R)
+ O.indent(2 * (depth + 1)) << "Node"
+ << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+ << ";\n";
+ }
+
+ O.indent(2 * depth) << "}\n";
+ }
+
+ static void addCustomGraphFeatures(const RegionInfo* RI,
+ GraphWriter<RegionInfo*> &GW) {
+ raw_ostream &O = GW.getOStream();
+ O << "\tcolorscheme = \"paired12\"\n";
+ printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+ : public DOTGraphTraitsViewer<RegionInfo, false> {
+ static char ID;
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){}
+};
+
+char RegionViewer::ID = 0;
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true);
+
+struct RegionOnlyViewer
+ : public DOTGraphTraitsViewer<RegionInfo, true> {
+ static char ID;
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){}
+};
+
+char RegionOnlyViewer::ID = 0;
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true);
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ static char ID;
+ RegionPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {}
+};
+} //end anonymous namespace
+
+char RegionPrinter::ID = 0;
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+ "Print regions of function to 'dot' file", true, true);
+
+namespace {
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ static char ID;
+ RegionOnlyPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {}
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file "
+ "(with no function bodies)",
+ true, true);
+
+FunctionPass* llvm::createRegionViewerPass() {
+ return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+ return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+ return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
+}
+
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 413b3b47f92a..b892d85f9f4a 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -103,8 +103,8 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
"derived loop"),
cl::init(100));
-static RegisterPass<ScalarEvolution>
-R("scalar-evolution", "Scalar Evolution Analysis", false, true);
+INITIALIZE_PASS(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true);
char ScalarEvolution::ID = 0;
//===----------------------------------------------------------------------===//
@@ -251,28 +251,59 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
OS << "(";
for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
OS << **I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << OpStr;
}
OS << ")";
}
bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (!getOperand(i)->dominates(BB, DT))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->dominates(BB, DT))
return false;
- }
return true;
}
bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (!getOperand(i)->properlyDominates(BB, DT))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->properlyDominates(BB, DT))
return false;
- }
return true;
}
+bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const {
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->isLoopInvariant(L))
+ return false;
+ return true;
+}
+
+// hasComputableLoopEvolution - N-ary expressions have computable loop
+// evolutions iff they have at least one operand that varies with the loop,
+// but that all varying operands are computable.
+bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const {
+ bool HasVarying = false;
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+ const SCEV *S = *I;
+ if (!S->isLoopInvariant(L)) {
+ if (S->hasComputableLoopEvolution(L))
+ HasVarying = true;
+ else
+ return false;
+ }
+ }
+ return HasVarying;
+}
+
+bool SCEVNAryExpr::hasOperand(const SCEV *O) const {
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+ const SCEV *S = *I;
+ if (O == S || S->hasOperand(O))
+ return true;
+ }
+ return false;
+}
+
bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
}
@@ -303,10 +334,14 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
if (QueryLoop->contains(L))
return false;
+ // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop.
+ if (L->contains(QueryLoop))
+ return true;
+
// This recurrence is variant w.r.t. QueryLoop if any of its operands
// are variant.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i)->isLoopInvariant(QueryLoop))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->isLoopInvariant(QueryLoop))
return false;
// Otherwise it's loop-invariant.
@@ -337,12 +372,36 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
OS << ">";
}
+void SCEVUnknown::deleted() {
+ // Clear this SCEVUnknown from ValuesAtScopes.
+ SE->ValuesAtScopes.erase(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Release the value.
+ setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from ValuesAtScopes.
+ SE->ValuesAtScopes.erase(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Update this SCEVUnknown to point to the new value. This is needed
+ // because there may still be outstanding SCEVs which still point to
+ // this SCEVUnknown.
+ setValPtr(New);
+}
+
bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
// All non-instruction values are loop invariant. All instructions are loop
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
- if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Instruction *I = dyn_cast<Instruction>(getValue()))
return L && !L->contains(I);
return true;
}
@@ -360,11 +419,11 @@ bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
}
const Type *SCEVUnknown::getType() const {
- return V->getType();
+ return getValue()->getType();
}
bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -381,7 +440,7 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
}
bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -406,7 +465,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
}
bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -448,166 +507,183 @@ void SCEVUnknown::print(raw_ostream &OS) const {
}
// Otherwise just print it normally.
- WriteAsOperand(OS, V, false);
+ WriteAsOperand(OS, getValue(), false);
}
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
-static bool CompareTypes(const Type *A, const Type *B) {
- if (A->getTypeID() != B->getTypeID())
- return A->getTypeID() < B->getTypeID();
- if (const IntegerType *AI = dyn_cast<IntegerType>(A)) {
- const IntegerType *BI = cast<IntegerType>(B);
- return AI->getBitWidth() < BI->getBitWidth();
- }
- if (const PointerType *AI = dyn_cast<PointerType>(A)) {
- const PointerType *BI = cast<PointerType>(B);
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const ArrayType *AI = dyn_cast<ArrayType>(A)) {
- const ArrayType *BI = cast<ArrayType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const VectorType *AI = dyn_cast<VectorType>(A)) {
- const VectorType *BI = cast<VectorType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const StructType *AI = dyn_cast<StructType>(A)) {
- const StructType *BI = cast<StructType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i)
- if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) ||
- CompareTypes(BI->getElementType(i), AI->getElementType(i)))
- return CompareTypes(AI->getElementType(i), BI->getElementType(i));
- }
- return false;
-}
-
namespace {
/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
/// than the complexity of the RHS. This comparator is used to canonicalize
/// expressions.
class SCEVComplexityCompare {
- LoopInfo *LI;
+ const LoopInfo *const LI;
public:
- explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+ // Return true or false if LHS is less than, or at least RHS, respectively.
bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
+
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
if (LHS == RHS)
- return false;
+ return 0;
// Primarily, sort the SCEVs by their getSCEVType().
- if (LHS->getSCEVType() != RHS->getSCEVType())
- return LHS->getSCEVType() < RHS->getSCEVType();
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
-
- // Sort SCEVUnknown values with some loose heuristics. TODO: This is
- // not as complete as it could be.
- if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) {
+ switch (LType) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
// Order pointer values after integer values. This helps SCEVExpander
// form GEPs.
- if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy())
- return false;
- if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy())
- return true;
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
// Compare getValueID values.
- if (LU->getValue()->getValueID() != RU->getValue()->getValueID())
- return LU->getValue()->getValueID() < RU->getValue()->getValueID();
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
// Sort arguments by their position.
- if (const Argument *LA = dyn_cast<Argument>(LU->getValue())) {
- const Argument *RA = cast<Argument>(RU->getValue());
- return LA->getArgNo() < RA->getArgNo();
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
}
- // For instructions, compare their loop depth, and their opcode.
- // This is pretty loose.
- if (Instruction *LV = dyn_cast<Instruction>(LU->getValue())) {
- Instruction *RV = cast<Instruction>(RU->getValue());
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
// Compare loop depths.
- if (LI->getLoopDepth(LV->getParent()) !=
- LI->getLoopDepth(RV->getParent()))
- return LI->getLoopDepth(LV->getParent()) <
- LI->getLoopDepth(RV->getParent());
-
- // Compare opcodes.
- if (LV->getOpcode() != RV->getOpcode())
- return LV->getOpcode() < RV->getOpcode();
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
// Compare the number of operands.
- if (LV->getNumOperands() != RV->getNumOperands())
- return LV->getNumOperands() < RV->getNumOperands();
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
}
- return false;
+ return 0;
}
- // Compare constant values.
- if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
const SCEVConstant *RC = cast<SCEVConstant>(RHS);
- if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth())
- return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth();
- return LC->getValue()->getValue().ult(RC->getValue()->getValue());
+
+ // Compare constant values.
+ const APInt &LA = LC->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
}
- // Compare addrec loop depths.
- if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
- if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth())
- return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth();
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+
+ return 0;
}
- // Lexicographically compare n-ary expressions.
- if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- for (unsigned i = 0, e = LC->getNumOperands(); i != e; ++i) {
- if (i >= RC->getNumOperands())
- return false;
- if (operator()(LC->getOperand(i), RC->getOperand(i)))
- return true;
- if (operator()(RC->getOperand(i), LC->getOperand(i)))
- return false;
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
+ if (X != 0)
+ return X;
}
- return LC->getNumOperands() < RC->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
}
- // Lexicographically compare udiv expressions.
- if (const SCEVUDivExpr *LC = dyn_cast<SCEVUDivExpr>(LHS)) {
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
- if (operator()(LC->getLHS(), RC->getLHS()))
- return true;
- if (operator()(RC->getLHS(), LC->getLHS()))
- return false;
- if (operator()(LC->getRHS(), RC->getRHS()))
- return true;
- if (operator()(RC->getRHS(), LC->getRHS()))
- return false;
- return false;
+
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
}
- // Compare cast expressions by operand.
- if (const SCEVCastExpr *LC = dyn_cast<SCEVCastExpr>(LHS)) {
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
- return operator()(LC->getOperand(), RC->getOperand());
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
+ }
+
+ default:
+ break;
}
llvm_unreachable("Unknown SCEV kind!");
- return false;
+ return 0;
}
};
}
@@ -628,8 +704,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
- if (SCEVComplexityCompare(LI)(Ops[1], Ops[0]))
- std::swap(Ops[0], Ops[1]);
+ const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+ if (SCEVComplexityCompare(LI)(RHS, LHS))
+ std::swap(LHS, RHS);
return;
}
@@ -845,6 +922,13 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return getAddRecExpr(Operands, AddRec->getLoop());
}
+ // As a special case, fold trunc(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
@@ -1163,6 +1247,13 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
return getAddRecExpr(Ops, AR->getLoop());
}
+ // As a special case, fold anyext(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
// If the expression is obviously signed, use the sext cast value.
if (isa<SCEVSMaxExpr>(Op))
return SExt;
@@ -1287,8 +1378,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (!isKnownNonNegative(Ops[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1321,22 +1413,29 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
- // Okay, check to see if the same value occurs in the operand list twice. If
- // so, merge them together into an multiply expression. Since we sorted the
- // list, these values are required to be adjacent.
+ // Okay, check to see if the same value occurs in the operand list more than
+ // once. If so, merge them together into an multiply expression. Since we
+ // sorted the list, these values are required to be adjacent.
const Type *Ty = Ops[0]->getType();
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ bool FoundMatch = false;
+ for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
- // Found a match, merge the two values into a multiply, and add any
- // remaining values to the result.
- const SCEV *Two = getConstant(Ty, 2);
- const SCEV *Mul = getMulExpr(Ops[i], Two);
- if (Ops.size() == 2)
+ // Scan ahead to count how many equal operands there are.
+ unsigned Count = 2;
+ while (i+Count != e && Ops[i+Count] == Ops[i])
+ ++Count;
+ // Merge the values into a multiply.
+ const SCEV *Scale = getConstant(Ty, Count);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ if (Ops.size() == Count)
return Mul;
- Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
- Ops.push_back(Mul);
- return getAddExpr(Ops, HasNUW, HasNSW);
+ Ops[i] = Mul;
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+ --i; e -= Count - 1;
+ FoundMatch = true;
}
+ if (FoundMatch)
+ return getAddExpr(Ops, HasNUW, HasNSW);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
@@ -1433,7 +1532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(),
+ for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
E = NewOps.end(); I != E; ++I)
MulOpLists[M.find(*I)->second].push_back(*I);
// Re-generate the operands list.
@@ -1460,20 +1559,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ if (isa<SCEVConstant>(MulOpSCEV))
+ continue;
for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
- if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
+ if (MulOpSCEV == Ops[AddOp]) {
// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
// If the multiply has more than two operands, we must get the
// Y*Z term.
- SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end());
- MulOps.erase(MulOps.begin()+MulOp);
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
const SCEV *One = getConstant(Ty, 1);
- const SCEV *AddOne = getAddExpr(InnerMul, One);
- const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
+ const SCEV *AddOne = getAddExpr(One, InnerMul);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
Ops.erase(Ops.begin()+AddOp);
@@ -1500,15 +1602,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
- Mul->op_end());
- MulOps.erase(MulOps.begin()+MulOp);
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul1 = getMulExpr(MulOps);
}
const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
- OtherMul->op_end());
- MulOps.erase(MulOps.begin()+OMulOp);
+ OtherMul->op_begin()+OMulOp);
+ MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
InnerMul2 = getMulExpr(MulOps);
}
const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
@@ -1574,30 +1676,31 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// there are multiple AddRec's with the same loop induction variable being
// added together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
- if (OtherIdx != Idx) {
- const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (AddRecLoop == OtherAddRec->getLoop()) {
- // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D}
- SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
- AddRec->op_end());
- for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
- if (i >= NewOps.size()) {
- NewOps.append(OtherAddRec->op_begin()+i,
- OtherAddRec->op_end());
- break;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ AddRecOps[i] = getAddExpr(AddRecOps[i],
+ OtherAddRec->getOperand(i));
+ }
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
- NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
- }
- const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop);
-
- if (Ops.size() == 2) return NewAddRec;
-
- Ops.erase(Ops.begin()+Idx);
- Ops.erase(Ops.begin()+OtherIdx-1);
- Ops.push_back(NewAddRec);
- return getAddExpr(Ops);
- }
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop);
+ return getAddExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
@@ -1633,17 +1736,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");
#endif
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (!isKnownNonNegative(Ops[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1740,8 +1844,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+ if (Ops[i]->isLoopInvariant(AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -1758,7 +1863,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer mul and the inner addrec are guaranteed to have no overflow.
- const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop,
HasNUW && AddRec->hasNoUnsignedWrap(),
HasNSW && AddRec->hasNoSignedWrap());
@@ -1778,28 +1883,30 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
- if (OtherIdx != Idx) {
- const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (AddRec->getLoop() == OtherAddRec->getLoop()) {
- // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D}
- const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
- const SCEV *NewStart = getMulExpr(F->getStart(),
- G->getStart());
- const SCEV *B = F->getStepRecurrence(*this);
- const SCEV *D = G->getStepRecurrence(*this);
- const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
- getMulExpr(G, B),
- getMulExpr(B, D));
- const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
- F->getLoop());
- if (Ops.size() == 2) return NewAddRec;
-
- Ops.erase(Ops.begin()+Idx);
- Ops.erase(Ops.begin()+OtherIdx-1);
- Ops.push_back(NewAddRec);
- return getMulExpr(Ops);
- }
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
+ // {A*C,+,F*D + G*B + B*D}<L>
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+ const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+ const SCEV *B = F->getStepRecurrence(*this);
+ const SCEV *D = G->getStepRecurrence(*this);
+ const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+ getMulExpr(G, B),
+ getMulExpr(B, D));
+ const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+ F->getLoop());
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ return getMulExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
@@ -1848,7 +1955,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// TODO: Generalize this to non-constants by using known-bits information.
const Type *Ty = LHS->getType();
unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
- unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getValue()->getValue().isPowerOf2())
@@ -1955,9 +2062,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
bool HasNUW, bool HasNSW) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Operands[i]->getType()) ==
- getEffectiveSCEVType(Operands[0]->getType()) &&
+ assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
#endif
@@ -1975,8 +2082,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- if (!isKnownNonNegative(Operands[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+ E = Operands.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1986,9 +2094,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
- if (L->contains(NestedLoop->getHeader()) ?
+ if (L->contains(NestedLoop) ?
(L->getLoopDepth() < NestedLoop->getLoopDepth()) :
- (!NestedLoop->contains(L->getHeader()) &&
+ (!NestedLoop->contains(L) &&
DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
NestedAR->op_end());
@@ -2055,9 +2163,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVSMaxExpr operand types don't match!");
#endif
@@ -2160,9 +2268,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVUMaxExpr operand types don't match!");
#endif
@@ -2326,8 +2434,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
ID.AddInteger(scUnknown);
ID.AddPointer(V);
void *IP = 0;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V);
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ assert(cast<SCEVUnknown>(S)->getValue() == V &&
+ "Stale SCEVUnknown in uniquing map!");
+ return S;
+ }
+ SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+ FirstUnknown);
+ FirstUnknown = cast<SCEVUnknown>(S);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
@@ -2391,10 +2505,15 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
- std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V);
- if (I != Scalars.end()) return I->second;
+ ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+ if (I != ValueExprMap.end()) return I->second;
const SCEV *S = createSCEV(V);
- Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+
+ // The process of creating a SCEV for V may have caused other SCEVs
+ // to have been created, so it's necessary to insert the new entry
+ // from scratch, rather than trying to remember the insert position
+ // above.
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
return S;
}
@@ -2428,6 +2547,10 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
///
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
const SCEV *RHS) {
+ // Fast path: X - X --> 0.
+ if (LHS == RHS)
+ return getConstant(LHS->getType(), 0);
+
// X - Y --> X + -Y
return getAddExpr(LHS, getNegativeSCEV(RHS));
}
@@ -2570,12 +2693,12 @@ PushDefUseChildren(Instruction *I,
// Push the def-use children onto the Worklist stack.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
- Worklist.push_back(cast<Instruction>(UI));
+ Worklist.push_back(cast<Instruction>(*UI));
}
/// ForgetSymbolicValue - This looks up computed SCEV values for all
/// instructions that depend on the given instruction and removes them from
-/// the Scalars map if they reference SymName. This is used during PHI
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
/// resolution.
void
ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
@@ -2588,9 +2711,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
if (It->second != SymName && !It->second->hasOperand(SymName))
@@ -2607,7 +2730,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
!isa<SCEVUnknown>(It->second) ||
(I != PN && It->second == SymName)) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
}
}
@@ -2644,9 +2767,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (BEValueV && StartValueV) {
// While we are analyzing this PHI node, handle its value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
- assert(Scalars.find(PN) == Scalars.end() &&
+ assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
"PHI node already processed?");
- Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
@@ -2707,7 +2830,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
- Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
@@ -2732,7 +2855,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
- Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
@@ -2777,7 +2900,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
- for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
+ for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
E = GEP->op_end();
I != E; ++I) {
Value *Index = *I;
@@ -3200,12 +3323,42 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
Operator *U = cast<Operator>(V);
switch (Opcode) {
- case Instruction::Add:
- return getAddExpr(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
- case Instruction::Mul:
- return getMulExpr(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
+ case Instruction::Add: {
+ // The simple thing to do would be to just call getSCEV on both operands
+ // and call getAddExpr with the result. However if we're looking at a
+ // bunch of things all added together, this can be quite inefficient,
+ // because it leads to N-1 getAddExpr calls for N ultimate operands.
+ // Instead, gather up all the operands and make a single getAddExpr call.
+ // LLVM IR canonical form means we need only traverse the left operands.
+ SmallVector<const SCEV *, 4> AddOps;
+ AddOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+ unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ break;
+ U = cast<Operator>(Op);
+ const SCEV *Op1 = getSCEV(U->getOperand(1));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getNegativeSCEV(Op1));
+ else
+ AddOps.push_back(Op1);
+ }
+ AddOps.push_back(getSCEV(U->getOperand(0)));
+ return getAddExpr(AddOps);
+ }
+ case Instruction::Mul: {
+ // See the Add code above.
+ SmallVector<const SCEV *, 4> MulOps;
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0);
+ Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+ Op = U->getOperand(0)) {
+ U = cast<Operator>(Op);
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ }
+ MulOps.push_back(getSCEV(U->getOperand(0)));
+ return getMulExpr(MulOps);
+ }
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
@@ -3467,7 +3620,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, One);
if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, One), LDiff);
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
case ICmpInst::ICMP_EQ:
@@ -3482,7 +3635,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const SCEV *LDiff = getMinusSCEV(LA, One);
const SCEV *RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, One), LDiff);
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
default:
@@ -3579,9 +3732,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, or it's a PHI that's in the progress of being computed
// by createNodeForPHI. In the former case, additional loop trip
@@ -3590,7 +3743,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -3619,11 +3772,10 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
@@ -3648,35 +3800,14 @@ void ScalarEvolution::forgetValue(Value *V) {
I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- // If there's a SCEVUnknown tying this value into the SCEV
- // space, remove it from the folding set map. The SCEVUnknown
- // object and any other SCEV objects which reference it
- // (transitively) remain allocated, effectively leaked until
- // the underlying BumpPtrAllocator is freed.
- //
- // This permits SCEV pointers to be used as keys in maps
- // such as the ValuesAtScopes map.
- FoldingSetNodeID ID;
- ID.AddInteger(scUnknown);
- ID.AddPointer(I);
- void *IP;
- if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
- UniqueSCEVs.RemoveNode(S);
-
- // This isn't necessary, but we might as well remove the
- // value from the ValuesAtScopes map too.
- ValuesAtScopes.erase(S);
- }
-
PushDefUseChildren(I, Worklist);
}
}
@@ -3816,14 +3947,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
else
MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
} else {
- // Both conditions must be true for the loop to exit.
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
- if (BTI0.Exact != getCouldNotCompute() &&
- BTI1.Exact != getCouldNotCompute())
- BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max != getCouldNotCompute() &&
- BTI1.Max != getCouldNotCompute())
- MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
}
return BackedgeTakenInfo(BECount, MaxBECount);
@@ -3851,14 +3981,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
else
MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
} else {
- // Both conditions must be false for the loop to exit.
+ // Both conditions must be false at the same time for the loop to exit.
+ // For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
- if (BTI0.Exact != getCouldNotCompute() &&
- BTI1.Exact != getCouldNotCompute())
- BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max != getCouldNotCompute() &&
- BTI1.Max != getCouldNotCompute())
- MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
}
return BackedgeTakenInfo(BECount, MaxBECount);
@@ -4203,7 +4332,7 @@ Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- std::map<PHINode*, Constant*>::iterator I =
+ std::map<PHINode*, Constant*>::const_iterator I =
ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
@@ -5185,7 +5314,8 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
LoopContinuePredicate->isUnconditional())
return false;
- return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS,
+ return isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
LoopContinuePredicate->getSuccessor(0) != L->getHeader());
}
@@ -5214,7 +5344,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
LoopEntryPredicate->isUnconditional())
continue;
- if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+ if (isImpliedCond(Pred, LHS, RHS,
+ LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
@@ -5224,24 +5355,24 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
-bool ScalarEvolution::isImpliedCond(Value *CondValue,
- ICmpInst::Predicate Pred,
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
bool Inverse) {
// Recursively handle And and Or conditions.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
- return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
- isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
} else if (BO->getOpcode() == Instruction::Or) {
if (Inverse)
- return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
- isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
}
}
- ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+ ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
// Bail if the ICmp's operands' types are wider than the needed type
@@ -5658,20 +5789,19 @@ void ScalarEvolution::SCEVCallbackVH::deleted() {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(getValPtr());
+ SE->ValueExprMap.erase(getValPtr());
// this now dangles!
}
-void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
// Forget all the expressions associated with users of the old value,
// so that future queries will recompute the expressions using the new
// value.
+ Value *Old = getValPtr();
SmallVector<User *, 16> Worklist;
SmallPtrSet<User *, 8> Visited;
- Value *Old = getValPtr();
- bool DeleteOld = false;
for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
@@ -5679,27 +5809,22 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
User *U = Worklist.pop_back_val();
// Deleting the Old value will cause this to dangle. Postpone
// that until everything else is done.
- if (U == Old) {
- DeleteOld = true;
+ if (U == Old)
continue;
- }
if (!Visited.insert(U))
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(U);
+ SE->ValueExprMap.erase(U);
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
}
- // Delete the Old value if it (indirectly) references itself.
- if (DeleteOld) {
- if (PHINode *PN = dyn_cast<PHINode>(Old))
- SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(Old);
- // this now dangles!
- }
- // this may dangle!
+ // Delete the Old value.
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(Old);
+ // this now dangles!
}
ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
@@ -5710,7 +5835,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(&ID) {
+ : FunctionPass(ID), FirstUnknown(0) {
}
bool ScalarEvolution::runOnFunction(Function &F) {
@@ -5722,7 +5847,13 @@ bool ScalarEvolution::runOnFunction(Function &F) {
}
void ScalarEvolution::releaseMemory() {
- Scalars.clear();
+ // Iterate through all the SCEVUnknown instances and call their
+ // destructors, so that they release their references to their values.
+ for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+ U->~SCEVUnknown();
+ FirstUnknown = 0;
+
+ ValueExprMap.clear();
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 58711b8be59e..93b2a8b06fbe 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,14 +34,14 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {}
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -58,11 +58,8 @@ namespace {
// Register this pass...
char ScalarEvolutionAliasAnalysis::ID = 0;
-static RegisterPass<ScalarEvolutionAliasAnalysis>
-X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false);
FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
return new ScalarEvolutionAliasAnalysis();
@@ -158,8 +155,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
Value *AO = GetBaseValue(AS);
Value *BO = GetBaseValue(BS);
if ((AO && AO != A) || (BO && BO != B))
- if (alias(AO ? AO : A, AO ? ~0u : ASize,
- BO ? BO : B, BO ? ~0u : BSize) == NoAlias)
+ if (alias(AO ? AO : A, AO ? UnknownSize : ASize,
+ BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index d4a4b26e25ec..66a06aeac43c 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -647,6 +647,11 @@ public:
bool operator()(std::pair<const Loop *, const SCEV *> LHS,
std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
// Compare loops with PickMostRelevantLoop.
if (LHS.first != RHS.first)
return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
@@ -699,8 +704,15 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
- for (; I != E && I->first == CurLoop; ++I)
- NewOps.push_back(I->second);
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
} else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
// The running sum is an integer, and there's a pointer at this level.
@@ -1047,9 +1059,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// First check for an existing canonical IV in a suitable type.
PHINode *CanonicalIV = 0;
if (PHINode *PN = L->getCanonicalInductionVariable())
- if (SE.isSCEVable(PN->getType()) &&
- SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() &&
- SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
CanonicalIV = PN;
// Rewrite an AddRec in terms of the canonical induction variable, if
@@ -1102,21 +1112,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
SE.getUnknown(expand(Rest))));
}
- // {0,+,1} --> Insert a canonical induction variable into the loop!
- if (S->isAffine() && S->getOperand(1)->isOne()) {
- // If there's a canonical IV, just use it.
- if (CanonicalIV) {
- assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
- "IVs with types different from the canonical IV should "
- "already have been handled!");
- return CanonicalIV;
- }
-
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
// Create and insert the PHI node for the induction variable in the
// specified loop.
BasicBlock *Header = L->getHeader();
- PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
- rememberInstruction(PN);
+ CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin());
+ rememberInstruction(CanonicalIV);
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
@@ -1125,40 +1127,45 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
- Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
- HP->getTerminator());
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
rememberInstruction(Add);
- PN->addIncoming(Add, HP);
+ CanonicalIV->addIncoming(Add, HP);
} else {
- PN->addIncoming(Constant::getNullValue(Ty), HP);
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
}
}
}
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
// {0,+,F} --> {0,+,1} * F
- // Get the canonical induction variable I for this loop.
- Value *I = CanonicalIV ?
- CanonicalIV :
- getOrInsertCanonicalInductionVariable(L, Ty);
// If this is a simple linear addrec, emit it now as a special case.
if (S->isAffine()) // {0,+,F} --> i*F
return
expand(SE.getTruncateOrNoop(
- SE.getMulExpr(SE.getUnknown(I),
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
SE.getNoopOrAnyExtend(S->getOperand(1),
- I->getType())),
+ CanonicalIV->getType())),
Ty));
// If this is a chain of recurrences, turn it into a closed form, using the
// folders, then expandCodeFor the closed form. This allows the folders to
// simplify the expression without having to build a bunch of special code
// into this folder.
- const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
// Promote S up to the canonical IV type, if the cast is foldable.
const SCEV *NewS = S;
- const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType());
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
if (isa<SCEVAddRecExpr>(Ext))
NewS = Ext;
@@ -1337,16 +1344,21 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
/// starts at zero and steps by one on each iteration.
-Value *
+PHINode *
SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
const Type *Ty) {
assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
SE.getConstant(Ty, 1), L);
+
+ // Emit code for it.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+ PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return V;
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 563fd2fa96e2..ac36cef89ebb 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
const Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
@@ -45,20 +45,17 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
// their uses occur in the predecessor block, not the block the PHI lives in)
// should still use the post-inc value. Check for this case now.
PHINode *PN = dyn_cast<PHINode>(User);
- if (!PN) return false; // not a phi, not dominated by latch block.
+ if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
- // Look at all of the uses of IV by the PHI node. If any use corresponds to
- // a block that is not dominated by the latch block, give up and use the
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
// preincremented value.
- unsigned NumUses = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == IV) {
- ++NumUses;
- if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
- return false;
- }
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
- // Okay, all uses of IV by PN are in predecessor blocks that really are
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
// dominated by the latch block. Use the post-incremented value.
return true;
}
@@ -72,6 +69,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
DominatorTree &DT) {
if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
return S;
+
if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
const SCEV *O = X->getOperand();
const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
@@ -85,9 +83,69 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
}
return S;
}
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // An addrec. This is the interesting part.
+ SmallVector<const SCEV *, 8> Operands;
+ const Loop *L = AR->getLoop();
+ // The addrec conceptually uses its operands at loop entry.
+ Instruction *LUser = L->getHeader()->begin();
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+ Operands.push_back(N);
+ }
+ const SCEV *Result = SE.getAddRecExpr(Operands, L);
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected transform name!");
+ case NormalizeAutodetect:
+ if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ Loops.insert(L);
+ }
+#if 0
+ // This assert is conceptually correct, but ScalarEvolution currently
+ // sometimes fails to canonicalize two equal SCEVs to exactly the same
+ // form. It's possibly a pessimization when this happens, but it isn't a
+ // correctness problem, so disable this assert for now.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Normalize:
+ if (Loops.count(L)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ }
+#if 0
+ // See the comment on the assert above.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+ break;
+ }
+ return Result;
+ }
+
if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
SmallVector<const SCEV *, 8> Operands;
bool Changed = false;
+ // Transform each operand.
for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
I != E; ++I) {
const SCEV *O = *I;
@@ -96,37 +154,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
Changed |= N != O;
Operands.push_back(N);
}
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // An addrec. This is the interesting part.
- const Loop *L = AR->getLoop();
- const SCEV *Result = SE.getAddRecExpr(Operands, L);
- switch (Kind) {
- default: llvm_unreachable("Unexpected transform name!");
- case NormalizeAutodetect:
- if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
- if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
- const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- Loops.insert(L);
- }
- break;
- case Normalize:
- if (Loops.count(L)) {
- const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- }
- break;
- case Denormalize:
- if (Loops.count(L))
- Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE));
- break;
- }
- return Result;
- }
+ // If any operand actually changed, return a transformed result.
if (Changed)
switch (S->getSCEVType()) {
case scAddExpr: return SE.getAddExpr(Operands);
@@ -137,6 +165,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
}
return S;
}
+
if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
const SCEV *LO = X->getLHS();
const SCEV *RO = X->getRHS();
@@ -148,6 +177,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
return SE.getUDivExpr(LN, RN);
return S;
}
+
llvm_unreachable("Unexpected SCEV kind!");
return 0;
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 000000000000..bbfdcec3f9b4
--- /dev/null
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,191 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language.
+//
+// This pass is language-independent. The type system is encoded in
+// metadata. This allows this pass to support typical C and C++ TBAA, but
+// it can also support custom aliasing behavior for other languages.
+//
+// This is a work-in-progress. It doesn't work yet, and the metadata
+// format isn't stable.
+//
+// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to
+// be extended.
+// TODO: AA chaining
+// TODO: struct fields
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// TBAANode - This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAANode {
+ const MDNode *Node;
+
+ public:
+ TBAANode() : Node(0) {}
+ explicit TBAANode(MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias DAG parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
+
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ // TODO: Think about the encoding.
+ return CI->isOne();
+ }
+ };
+}
+
+namespace {
+ /// TypeBasedAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses TypeBased to answer queries.
+ class TypeBasedAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+ virtual bool pointsToConstantMemory(const Value *P);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+ "Type-Based Alias Analysis", false, true, false);
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+ return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
+ const Value *B, unsigned BSize) {
+ // Currently, metadata can only be attached to Instructions.
+ const Instruction *AI = dyn_cast<Instruction>(A);
+ if (!AI) return MayAlias;
+ const Instruction *BI = dyn_cast<Instruction>(B);
+ if (!BI) return MayAlias;
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ MDNode *AM =
+ AI->getMetadata(AI->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!AM) return MayAlias;
+ MDNode *BM =
+ BI->getMetadata(BI->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!BM) return MayAlias;
+
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the DAG from A to see if we reach B.
+ for (TBAANode T(AM); ; ) {
+ if (T.getNode() == BM)
+ // B is an ancestor of A.
+ return MayAlias;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the DAG from B to see if we reach A.
+ for (TBAANode T(BM); ; ) {
+ if (T.getNode() == AM)
+ // A is an ancestor of B.
+ return MayAlias;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have the same root, then we've proved there's no alias.
+ if (RootA.getNode() == RootB.getNode())
+ return NoAlias;
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ return MayAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) {
+ // Currently, metadata can only be attached to Instructions.
+ const Instruction *I = dyn_cast<Instruction>(P);
+ if (!I) return false;
+
+ MDNode *M =
+ I->getMetadata(I->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!M) return false;
+
+ // If this is an "immutable" type, we can assume the pointer is pointing
+ // to constant memory.
+ return TBAANode(M).TypeIsImmutable();
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b4c9884a20ed..181c9b01980c 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -880,19 +880,20 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
}
Value *Mul0 = NULL;
- Value *Mul1 = NULL;
- bool M0 = ComputeMultiple(Op0, Base, Mul0,
- LookThroughSExt, Depth+1);
- bool M1 = ComputeMultiple(Op1, Base, Mul1,
- LookThroughSExt, Depth+1);
-
- if (M0) {
- if (isa<Constant>(Op1) && isa<Constant>(Mul0)) {
- // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
- Multiple = ConstantExpr::getMul(cast<Constant>(Mul0),
- cast<Constant>(Op1));
- return true;
- }
+ if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1))
+ if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+ if (Op1C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+ if (Op1C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(MulC, Op1C);
+ return true;
+ }
if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
if (Mul0CI->getValue() == 1) {
@@ -902,13 +903,21 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
}
}
- if (M1) {
- if (isa<Constant>(Op0) && isa<Constant>(Mul1)) {
- // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
- Multiple = ConstantExpr::getMul(cast<Constant>(Mul1),
- cast<Constant>(Op0));
- return true;
- }
+ Value *Mul1 = NULL;
+ if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+ if (Constant *Op0C = dyn_cast<Constant>(Op0))
+ if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+ if (Op0C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+ if (Op0C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(MulC, Op0C);
+ return true;
+ }
if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
if (Mul1CI->getValue() == 1) {
@@ -973,195 +982,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers. Return the scale and offset
-/// values as APInts and return V as a Value*. The incoming Value is known to
-/// have IntegerType. Note that this looks through extends, so the high bits
-/// may not be represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
- const TargetData *TD, unsigned Depth) {
- assert(V->getType()->isIntegerTy() && "Not an integer value");
-
- // Limit our recursion depth.
- if (Depth == 6) {
- Scale = 1;
- Offset = 0;
- return V;
- }
-
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
- switch (BOp->getOpcode()) {
- default: break;
- case Instruction::Or:
- // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
- // analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD))
- break;
- // FALL THROUGH.
- case Instruction::Add:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset += RHSC->getValue();
- return V;
- case Instruction::Mul:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset *= RHSC->getValue();
- Scale *= RHSC->getValue();
- return V;
- case Instruction::Shl:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset <<= RHSC->getValue().getLimitedValue();
- Scale <<= RHSC->getValue().getLimitedValue();
- return V;
- }
- }
- }
-
- // Since clients don't care about the high bits of the value, just scales and
- // offsets, we can look through extensions.
- if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
- Value *CastOp = cast<CastInst>(V)->getOperand(0);
- unsigned OldWidth = Scale.getBitWidth();
- unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
- Scale.trunc(SmallWidth);
- Offset.trunc(SmallWidth);
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1);
- Scale.zext(OldWidth);
- Offset.zext(OldWidth);
- return Result;
- }
-
- Scale = 1;
- Offset = 0;
- return V;
-}
-
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
-///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
-///
-/// When TargetData is around, this function is capable of analyzing everything
-/// that Value::getUnderlyingObject() can look through. When not, it just looks
-/// through pointer casts.
-///
-const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
- SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices,
- const TargetData *TD) {
- // Limit recursion depth to limit compile time in crazy cases.
- unsigned MaxLookup = 6;
-
- BaseOffs = 0;
- do {
- // See if this is a bitcast or GEP.
- const Operator *Op = dyn_cast<Operator>(V);
- if (Op == 0) {
- // The only non-operator case we can handle are GlobalAliases.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (!GA->mayBeOverridden()) {
- V = GA->getAliasee();
- continue;
- }
- }
- return V;
- }
-
- if (Op->getOpcode() == Instruction::BitCast) {
- V = Op->getOperand(0);
- continue;
- }
-
- const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
- if (GEPOp == 0)
- return V;
-
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
- ->getElementType()->isSized())
- return V;
-
- // If we are lacking TargetData information, we can't compute the offets of
- // elements computed by GEPs. However, we can handle bitcast equivalent
- // GEPs.
- if (!TD) {
- if (!GEPOp->hasAllZeroIndices())
- return V;
- V = GEPOp->getOperand(0);
- continue;
- }
-
- // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
- gep_type_iterator GTI = gep_type_begin(GEPOp);
- for (User::const_op_iterator I = GEPOp->op_begin()+1,
- E = GEPOp->op_end(); I != E; ++I) {
- Value *Index = *I;
- // Compute the (potentially symbolic) offset in bytes for this index.
- if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
- // For a struct, add the member offset.
- unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- if (FieldNo == 0) continue;
-
- BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
- continue;
- }
-
- // For an array/pointer, add the element offset, explicitly scaled.
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
- if (CIdx->isZero()) continue;
- BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
- continue;
- }
-
- uint64_t Scale = TD->getTypeAllocSize(*GTI);
-
- // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
- unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0);
-
- // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
- // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- BaseOffs += IndexOffset.getZExtValue()*Scale;
- Scale *= IndexScale.getZExtValue();
-
-
- // If we already had an occurrance of this index variable, merge this
- // scale into it. For example, we want to handle:
- // A[x][x] -> x*16 + x*4 -> x*20
- // This also ensures that 'x' only appears in the index list once.
- for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
- if (VarIndices[i].first == Index) {
- Scale += VarIndices[i].second;
- VarIndices.erase(VarIndices.begin()+i);
- break;
- }
- }
-
- // Make sure that we have a scale that makes sense for this target's
- // pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
- Scale <<= ShiftBits;
- Scale >>= ShiftBits;
- }
-
- if (Scale)
- VarIndices.push_back(std::make_pair(Index, Scale));
- }
-
- // Analyze the base pointer next.
- V = GEPOp->getOperand(0);
- } while (--MaxLookup);
-
- // If the chain of expressions is too deep, just return early.
- return V;
-}
-
-
// This is the recursive version of BuildSubAggregate. It takes a few different
// arguments. Idxs is the index within the nested struct From that we are
// looking at now (which is of type IndexedType). IdxSkip is the number of
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f4c0e50fd94d..032753a3b2c6 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -493,6 +493,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(linker_private);
KEYWORD(linker_private_weak);
+ KEYWORD(linker_private_weak_def_auto);
KEYWORD(internal);
KEYWORD(available_externally);
KEYWORD(linkonce);
@@ -572,7 +573,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(type);
KEYWORD(opaque);
- KEYWORD(union);
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 221b994db55f..f21a065473b6 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -199,6 +199,7 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::kw_private: // OptionalLinkage
case lltok::kw_linker_private: // OptionalLinkage
case lltok::kw_linker_private_weak: // OptionalLinkage
+ case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
case lltok::kw_internal: // OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
@@ -517,11 +518,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
if (Result) return false;
// Otherwise, create MDNode forward reference.
-
- // FIXME: This is not unique enough!
- std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID);
- Value *V = MDString::get(Context, FwdRefName);
- MDNode *FwdNode = MDNode::get(Context, &V, 1);
+ MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0);
ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
if (NumberedMetadata.size() <= MID)
@@ -543,27 +540,20 @@ bool LLParser::ParseNamedMetadata() {
ParseToken(lltok::lbrace, "Expected '{' here"))
return true;
- SmallVector<MDNode *, 8> Elts;
+ NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
if (Lex.getKind() != lltok::rbrace)
do {
- // Null is a special case since it is typeless.
- if (EatIfPresent(lltok::kw_null)) {
- Elts.push_back(0);
- continue;
- }
-
if (ParseToken(lltok::exclaim, "Expected '!' here"))
return true;
MDNode *N = 0;
if (ParseMDNodeID(N)) return true;
- Elts.push_back(N);
+ NMD->addOperand(N);
} while (EatIfPresent(lltok::comma));
if (ParseToken(lltok::rbrace, "expected end of metadata node"))
return true;
- NamedMDNode::Create(Context, Name, Elts.data(), Elts.size(), M);
return false;
}
@@ -592,7 +582,9 @@ bool LLParser::ParseStandaloneMetadata() {
std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
FI = ForwardRefMDNodes.find(MetadataID);
if (FI != ForwardRefMDNodes.end()) {
- FI->second.first->replaceAllUsesWith(Init);
+ MDNode *Temp = FI->second.first;
+ Temp->replaceAllUsesWith(Init);
+ MDNode::deleteTemporary(Temp);
ForwardRefMDNodes.erase(FI);
assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
@@ -632,7 +624,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::PrivateLinkage &&
Linkage != GlobalValue::LinkerPrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateWeakLinkage)
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
return Error(LinkageLoc, "invalid linkage type for alias");
Constant *Aliasee;
@@ -1017,6 +1010,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
/// ::= 'private'
/// ::= 'linker_private'
/// ::= 'linker_private_weak'
+/// ::= 'linker_private_weak_def_auto'
/// ::= 'internal'
/// ::= 'weak'
/// ::= 'weak_odr'
@@ -1038,6 +1032,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_linker_private_weak:
Res = GlobalValue::LinkerPrivateWeakLinkage;
break;
+ case lltok::kw_linker_private_weak_def_auto:
+ Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+ break;
case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
@@ -1120,29 +1117,44 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
/// ParseInstructionMetadata
/// ::= !dbg !42 (',' !dbg !57)*
-bool LLParser::ParseInstructionMetadata(Instruction *Inst) {
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+ PerFunctionState *PFS) {
do {
if (Lex.getKind() != lltok::MetadataVar)
return TokError("expected metadata after comma");
std::string Name = Lex.getStrVal();
+ unsigned MDK = M->getMDKindID(Name.c_str());
Lex.Lex();
MDNode *Node;
unsigned NodeID;
SMLoc Loc = Lex.getLoc();
- if (ParseToken(lltok::exclaim, "expected '!' here") ||
- ParseMDNodeID(Node, NodeID))
+
+ if (ParseToken(lltok::exclaim, "expected '!' here"))
return true;
- unsigned MDK = M->getMDKindID(Name.c_str());
- if (Node) {
- // If we got the node, add it to the instruction.
- Inst->setMetadata(MDK, Node);
+ // This code is similar to that of ParseMetadataValue, however it needs to
+ // have special-case code for a forward reference; see the comments on
+ // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+ // at the top level here.
+ if (Lex.getKind() == lltok::lbrace) {
+ ValID ID;
+ if (ParseMetadataListValue(ID, PFS))
+ return true;
+ assert(ID.Kind == ValID::t_MDNode);
+ Inst->setMetadata(MDK, ID.MDNodeVal);
} else {
- MDRef R = { Loc, MDK, NodeID };
- // Otherwise, remember that this should be resolved later.
- ForwardRefInstMetadata[Inst].push_back(R);
+ if (ParseMDNodeID(Node, NodeID))
+ return true;
+ if (Node) {
+ // If we got the node, add it to the instruction.
+ Inst->setMetadata(MDK, Node);
+ } else {
+ MDRef R = { Loc, MDK, NodeID };
+ // Otherwise, remember that this should be resolved later.
+ ForwardRefInstMetadata[Inst].push_back(R);
+ }
}
// If this is the end of the list, we're done.
@@ -1161,6 +1173,8 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
if (ParseUInt32(Alignment)) return true;
if (!isPowerOf2_32(Alignment))
return Error(AlignLoc, "alignment is not a power of two");
+ if (Alignment > Value::MaximumAlignment)
+ return Error(AlignLoc, "huge alignments are not supported yet");
return false;
}
@@ -1183,6 +1197,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
if (Lex.getKind() != lltok::kw_align)
return Error(Lex.getLoc(), "expected metadata or 'align'");
+ LocTy AlignLoc = Lex.getLoc();
if (ParseOptionalAlignment(Alignment)) return true;
}
@@ -1344,11 +1359,6 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
if (ParseStructType(Result, false))
return true;
break;
- case lltok::kw_union:
- // TypeRec ::= 'union' '{' ... '}'
- if (ParseUnionType(Result))
- return true;
- break;
case lltok::lsquare:
// TypeRec ::= '[' ... ']'
Lex.Lex(); // eat the lsquare.
@@ -1658,38 +1668,6 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
return false;
}
-/// ParseUnionType
-/// TypeRec
-/// ::= 'union' '{' TypeRec (',' TypeRec)* '}'
-bool LLParser::ParseUnionType(PATypeHolder &Result) {
- assert(Lex.getKind() == lltok::kw_union);
- Lex.Lex(); // Consume the 'union'
-
- if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true;
-
- SmallVector<PATypeHolder, 8> ParamsList;
- do {
- LocTy EltTyLoc = Lex.getLoc();
- if (ParseTypeRec(Result)) return true;
- ParamsList.push_back(Result);
-
- if (Result->isVoidTy())
- return Error(EltTyLoc, "union element can not have void type");
- if (!UnionType::isValidElementType(Result))
- return Error(EltTyLoc, "invalid element type for union");
-
- } while (EatIfPresent(lltok::comma)) ;
-
- if (ParseToken(lltok::rbrace, "expected '}' at end of union"))
- return true;
-
- SmallVector<const Type*, 8> ParamsListTy;
- for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
- ParamsListTy.push_back(ParamsList[i].get());
- Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size()));
- return false;
-}
-
/// ParseArrayVectorType - Parse an array or vector type, assuming the first
/// token has already been consumed.
/// TypeRec
@@ -2504,6 +2482,20 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
return false;
}
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex();
+
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+ ID.Kind = ValID::t_MDNode;
+ return false;
+}
+
/// ParseMetadataValue
/// ::= !42
/// ::= !{...}
@@ -2514,16 +2506,8 @@ bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
// MDNode:
// !{ ... }
- if (EatIfPresent(lltok::lbrace)) {
- SmallVector<Value*, 16> Elts;
- if (ParseMDNodeVector(Elts, PFS) ||
- ParseToken(lltok::rbrace, "expected end of metadata node"))
- return true;
-
- ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
- ID.Kind = ValID::t_MDNode;
- return false;
- }
+ if (Lex.getKind() == lltok::lbrace)
+ return ParseMetadataListValue(ID, PFS);
// Standalone metadata reference
// !42
@@ -2635,16 +2619,8 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = Constant::getNullValue(Ty);
return false;
case ValID::t_Constant:
- if (ID.ConstantVal->getType() != Ty) {
- // Allow a constant struct with a single member to be converted
- // to a union, if the union has a member which is the same type
- // as the struct member.
- if (const UnionType* utype = dyn_cast<UnionType>(Ty)) {
- return ParseUnionValue(utype, ID, V);
- }
-
+ if (ID.ConstantVal->getType() != Ty)
return Error(ID.Loc, "constant expression type mismatch");
- }
V = ID.ConstantVal;
return false;
@@ -2675,22 +2651,6 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
return false;
}
-bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) {
- if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) {
- if (stype->getNumContainedTypes() != 1)
- return Error(ID.Loc, "constant expression type mismatch");
- int index = utype->getElementTypeIndex(stype->getContainedType(0));
- if (index < 0)
- return Error(ID.Loc, "initializer type is not a member of the union");
-
- V = ConstantUnion::get(
- utype, cast<Constant>(ID.ConstantVal->getOperand(0)));
- return false;
- }
-
- return Error(ID.Loc, "constant expression type mismatch");
-}
-
/// FunctionHeader
/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
@@ -2724,6 +2684,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::PrivateLinkage:
case GlobalValue::LinkerPrivateLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::LinkOnceAnyLinkage:
@@ -2980,7 +2941,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// With a normal result, we check to see if the instruction is followed by
// a comma and metadata.
if (EatIfPresent(lltok::comma))
- if (ParseInstructionMetadata(Inst))
+ if (ParseInstructionMetadata(Inst, &PFS))
return true;
break;
case InstExtraComma:
@@ -2988,7 +2949,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// If the instruction parser ate an extra comma at the end of it, it
// *must* be followed by metadata.
- if (ParseInstructionMetadata(Inst))
+ if (ParseInstructionMetadata(Inst, &PFS))
return true;
break;
}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index f765a2ae4e6c..404cec3ed7c7 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -32,7 +32,6 @@ namespace llvm {
class GlobalValue;
class MDString;
class MDNode;
- class UnionType;
/// ValID - Represents a reference of a definition of some sort with no type.
/// There are several cases where we have to parse the value but where the
@@ -80,6 +79,14 @@ namespace llvm {
// Instruction metadata resolution. Each instruction can have a list of
// MDRef info associated with them.
+ //
+ // The simpler approach of just creating temporary MDNodes and then calling
+ // RAUW on them when the definition is processed doesn't work because some
+ // instruction metadata kinds, such as dbg, get stored in the IR in an
+ // "optimized" format which doesn't participate in the normal value use
+ // lists. This means that RAUW doesn't work, even on temporary MDNodes
+ // which otherwise support RAUW. Instead, we defer resolving MDNode
+ // references until the definitions have been processed.
struct MDRef {
SMLoc Loc;
unsigned MDKind, MDSlot;
@@ -180,7 +187,6 @@ namespace llvm {
bool ParseOptionalCallingConv(CallingConv::ID &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
bool ParseOptionalStackAlignment(unsigned &Alignment);
- bool ParseInstructionMetadata(Instruction *Inst);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
@@ -222,7 +228,6 @@ namespace llvm {
}
bool ParseTypeRec(PATypeHolder &H);
bool ParseStructType(PATypeHolder &H, bool Packed);
- bool ParseUnionType(PATypeHolder &H);
bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
bool ParseFunctionType(PATypeHolder &Result);
PATypeHolder HandleUpRefs(const Type *Ty);
@@ -291,7 +296,6 @@ namespace llvm {
return ParseTypeAndBasicBlock(BB, Loc, PFS);
}
- bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
struct ParamInfo {
LocTy Loc;
@@ -308,8 +312,10 @@ namespace llvm {
bool ParseGlobalValue(const Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+ bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
// Function Parsing.
struct ArgInfo {
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 2703134ec1a9..61f93a427498 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -37,7 +37,8 @@ namespace lltok {
kw_declare, kw_define,
kw_global, kw_constant,
- kw_private, kw_linker_private, kw_linker_private_weak, kw_internal,
+ kw_private, kw_linker_private, kw_linker_private_weak,
+ kw_linker_private_weak_def_auto, kw_internal,
kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
@@ -97,7 +98,6 @@ namespace lltok {
kw_type,
kw_opaque,
- kw_union,
kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index e511cbe29c75..e7cef9b5c3c5 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -45,8 +45,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
if (F == 0) {
Err = SMDiagnostic(Filename,
- "Could not open input file '" + Filename + "': " +
- ErrorStr);
+ "Could not open input file: " + ErrorStr);
return 0;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index b3f0776d29d5..830c79aa3b54 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -39,6 +39,7 @@ void BitcodeReader::FreeState() {
std::vector<BasicBlock*>().swap(FunctionBBs);
std::vector<Function*>().swap(FunctionsWithBodies);
DeferredFunctionInfo.clear();
+ MDKindMap.clear();
}
//===----------------------------------------------------------------------===//
@@ -76,6 +77,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
case 12: return GlobalValue::AvailableExternallyLinkage;
case 13: return GlobalValue::LinkerPrivateLinkage;
case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+ case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
}
}
@@ -295,8 +297,6 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
} else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
UserCS->getType()->isPacked());
- } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) {
- NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]);
} else if (isa<ConstantVector>(UserC)) {
NewC = ConstantVector::get(&NewOps[0], NewOps.size());
} else {
@@ -332,9 +332,9 @@ void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
}
// If there was a forward reference to this value, replace it.
- Value *PrevVal = OldV;
+ MDNode *PrevVal = cast<MDNode>(OldV);
OldV->replaceAllUsesWith(V);
- delete PrevVal;
+ MDNode::deleteTemporary(PrevVal);
// Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
// value for Idx.
MDValuePtrs[Idx] = V;
@@ -350,7 +350,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
}
// Create and return a placeholder, which will later be RAUW'd.
- Value *V = new Argument(Type::getMetadataTy(Context));
+ Value *V = MDNode::getTemporary(Context, 0, 0);
MDValuePtrs[Idx] = V;
return V;
}
@@ -589,13 +589,6 @@ bool BitcodeReader::ParseTypeTable() {
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
- case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N]
- SmallVector<const Type*, 8> EltTys;
- for (unsigned i = 0, e = Record.size(); i != e; ++i)
- EltTys.push_back(getTypeByID(Record[i], true));
- ResultTy = UnionType::get(&EltTys[0], EltTys.size());
- break;
- }
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
return Error("Invalid ARRAY type record");
@@ -781,7 +774,8 @@ bool BitcodeReader::ParseMetadata() {
bool IsFunctionLocal = false;
// Read a record.
Record.clear();
- switch (Stream.ReadRecord(Code, Record)) {
+ Code = Stream.ReadRecord(Code, Record);
+ switch (Code) {
default: // Default behavior: ignore.
break;
case bitc::METADATA_NAME: {
@@ -794,34 +788,46 @@ bool BitcodeReader::ParseMetadata() {
Record.clear();
Code = Stream.ReadCode();
- // METADATA_NAME is always followed by METADATA_NAMED_NODE.
- if (Stream.ReadRecord(Code, Record) != bitc::METADATA_NAMED_NODE)
+ // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
+ // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+ unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+ if (NextBitCode == bitc::METADATA_NAMED_NODE) {
+ LLVM2_7MetadataDetected = true;
+ } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
assert ( 0 && "Inavlid Named Metadata record");
// Read named metadata elements.
unsigned Size = Record.size();
- SmallVector<MDNode *, 8> Elts;
+ NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
for (unsigned i = 0; i != Size; ++i) {
- if (Record[i] == ~0U) {
- Elts.push_back(NULL);
- continue;
- }
MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
if (MD == 0)
return Error("Malformed metadata record");
- Elts.push_back(MD);
+ NMD->addOperand(MD);
}
- Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(),
- Elts.size(), TheModule);
- MDValueList.AssignValue(V, NextMDValueNo++);
+ // Backwards compatibility hack: NamedMDValues used to be Values,
+ // and they got their own slots in the value numbering. They are no
+ // longer Values, however we still need to account for them in the
+ // numbering in order to be able to read old bitcode files.
+ // FIXME: Remove this in LLVM 3.0.
+ if (LLVM2_7MetadataDetected)
+ MDValueList.AssignValue(0, NextMDValueNo++);
break;
}
- case bitc::METADATA_FN_NODE:
+ case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_FN_NODE2:
IsFunctionLocal = true;
// fall-through
- case bitc::METADATA_NODE: {
+ case bitc::METADATA_NODE: // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_NODE2: {
+
+ // Detect 2.7-era metadata.
+ // FIXME: Remove in LLVM 3.0.
+ if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
+ LLVM2_7MetadataDetected = true;
+
if (Record.size() % 2 == 1)
- return Error("Invalid METADATA_NODE record");
+ return Error("Invalid METADATA_NODE2 record");
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
@@ -859,13 +865,12 @@ bool BitcodeReader::ParseMetadata() {
SmallString<8> Name;
Name.resize(RecordLength-1);
unsigned Kind = Record[0];
- (void) Kind;
for (unsigned i = 1; i != RecordLength; ++i)
Name[i-1] = Record[i];
unsigned NewKind = TheModule->getMDKindID(Name.str());
- assert(Kind == NewKind &&
- "FIXME: Unable to handle custom metadata mismatch!");(void)NewKind;
+ if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+ return Error("Conflicting METADATA_KIND records");
break;
}
}
@@ -1020,11 +1025,6 @@ bool BitcodeReader::ParseConstants() {
Elts.push_back(ValueList.getConstantFwdRef(Record[i],
STy->getElementType(i)));
V = ConstantStruct::get(STy, Elts);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) {
- uint64_t Index = Record[0];
- Constant *Val = ValueList.getConstantFwdRef(Record[1],
- UnTy->getElementType(Index));
- V = ConstantUnion::get(UnTy, Val);
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
const Type *EltTy = ATy->getElementType();
for (unsigned i = 0; i != Size; ++i)
@@ -1297,6 +1297,12 @@ bool BitcodeReader::ParseModule() {
UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
}
+ // Look for global variables which need to be renamed.
+ for (Module::global_iterator
+ GI = TheModule->global_begin(), GE = TheModule->global_end();
+ GI != GE; ++GI)
+ UpgradeGlobalVariable(GI);
+
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
@@ -1614,15 +1620,22 @@ bool BitcodeReader::ParseMetadataAttachment() {
switch (Stream.ReadRecord(Code, Record)) {
default: // Default behavior: ignore.
break;
- case bitc::METADATA_ATTACHMENT: {
+ // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_ATTACHMENT:
+ LLVM2_7MetadataDetected = true;
+ case bitc::METADATA_ATTACHMENT2: {
unsigned RecordLength = Record.size();
if (Record.empty() || (RecordLength - 1) % 2 == 1)
return Error ("Invalid METADATA_ATTACHMENT reader!");
Instruction *Inst = InstructionList[Record[0]];
for (unsigned i = 1; i != RecordLength; i = i+2) {
unsigned Kind = Record[i];
+ DenseMap<unsigned, unsigned>::iterator I =
+ MDKindMap.find(Kind);
+ if (I == MDKindMap.end())
+ return Error("Invalid metadata kind ID");
Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
- Inst->setMetadata(Kind, cast<MDNode>(Node));
+ Inst->setMetadata(I->second, cast<MDNode>(Node));
}
break;
}
@@ -1638,6 +1651,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
+ unsigned ModuleMDValueListSize = MDValueList.size();
// Add all the function arguments to the value table.
for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -1722,7 +1736,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
I = 0;
continue;
- case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
+ // FIXME: Remove this in LLVM 3.0.
+ case bitc::FUNC_CODE_DEBUG_LOC:
+ LLVM2_7MetadataDetected = true;
+ case bitc::FUNC_CODE_DEBUG_LOC2: { // DEBUG_LOC: [line, col, scope, ia]
I = 0; // Get the last instruction emitted.
if (CurBB && !CurBB->empty())
I = &CurBB->back();
@@ -1988,6 +2005,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
} while(OpNum != Record.size());
const Type *ReturnType = F->getReturnType();
+ // Handle multiple return values. FIXME: Remove in LLVM 3.0.
if (Vs.size() > 1 ||
(ReturnType->isStructTy() &&
(Vs.empty() || Vs[0]->getType() != ReturnType))) {
@@ -2183,7 +2201,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
// For backward compatibility, tolerate a lack of an opty, and use i32.
- // LLVM 3.0: Remove this.
+ // Remove this in LLVM 3.0.
if (Record.size() < 3 || Record.size() > 4)
return Error("Invalid ALLOCA record");
unsigned OpNum = 0;
@@ -2236,7 +2254,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_CALL: {
+ // FIXME: Remove this in LLVM 3.0.
+ case bitc::FUNC_CODE_INST_CALL:
+ LLVM2_7MetadataDetected = true;
+ case bitc::FUNC_CODE_INST_CALL2: {
// CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
return Error("Invalid CALL record");
@@ -2324,7 +2345,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (A->getParent() == 0) {
// We found at least one unresolved value. Nuke them all to avoid leaks.
for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
- if ((A = dyn_cast<Argument>(ValueList.back())) && A->getParent() == 0) {
+ if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
A->replaceAllUsesWith(UndefValue::get(A->getType()));
delete A;
}
@@ -2333,6 +2354,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
}
+ // FIXME: Check for unresolved forward-declared metadata references
+ // and clean up leaks.
+
// See if anything took the address of blocks in this function. If so,
// resolve them now.
DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
@@ -2352,8 +2376,21 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
BlockAddrFwdRefs.erase(BAFRI);
}
+ // FIXME: Remove this in LLVM 3.0.
+ unsigned NewMDValueListSize = MDValueList.size();
+
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
+ MDValueList.shrinkTo(ModuleMDValueListSize);
+
+ // Backwards compatibility hack: Function-local metadata numbers
+ // were previously not reset between functions. This is now fixed,
+ // however we still need to understand the old numbering in order
+ // to be able to read old bitcode files.
+ // FIXME: Remove this in LLVM 3.0.
+ if (LLVM2_7MetadataDetected)
+ MDValueList.resize(NewMDValueListSize);
+
std::vector<BasicBlock*>().swap(FunctionBBs);
return false;
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 55c71f7c886f..053121bdad6e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -156,6 +156,9 @@ class BitcodeReader : public GVMaterializer {
// stored here with their replacement function.
typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+ DenseMap<unsigned, unsigned> MDKindMap;
// After the module header has been read, the FunctionsWithBodies list is
// reversed. This keeps track of whether we've done this yet.
@@ -170,11 +173,18 @@ class BitcodeReader : public GVMaterializer {
/// are resolved lazily when functions are loaded.
typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+ /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
+ /// earlier was detected, in which case we behave slightly differently,
+ /// for compatibility.
+ /// FIXME: Remove in LLVM 3.0.
+ bool LLVM2_7MetadataDetected;
public:
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
- ErrorString(0), ValueList(C), MDValueList(C) {
+ ErrorString(0), ValueList(C), MDValueList(C),
+ LLVM2_7MetadataDetected(false) {
HasReversedFunctionsWithBodies = false;
}
~BitcodeReader() {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index fa1b2c4bee2b..7b6fc6cd928d 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -181,14 +181,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Log2_32_Ceil(VE.getTypes().size()+1)));
unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
- // Abbrev for TYPE_CODE_UNION.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
- unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
-
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -258,17 +250,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
AbbrevToUse = StructAbbrev;
break;
}
- case Type::UnionTyID: {
- const UnionType *UT = cast<UnionType>(T);
- // UNION: [eltty x N]
- Code = bitc::TYPE_CODE_UNION;
- // Output all of the element types.
- for (UnionType::element_iterator I = UT->element_begin(),
- E = UT->element_end(); I != E; ++I)
- TypeVals.push_back(VE.getTypeID(*I));
- AbbrevToUse = UnionAbbrev;
- break;
- }
case Type::ArrayTyID: {
const ArrayType *AT = cast<ArrayType>(T);
// ARRAY: [numelts, eltty]
@@ -299,21 +280,22 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
static unsigned getEncodedLinkage(const GlobalValue *GV) {
switch (GV->getLinkage()) {
default: llvm_unreachable("Invalid linkage!");
- case GlobalValue::ExternalLinkage: return 0;
- case GlobalValue::WeakAnyLinkage: return 1;
- case GlobalValue::AppendingLinkage: return 2;
- case GlobalValue::InternalLinkage: return 3;
- case GlobalValue::LinkOnceAnyLinkage: return 4;
- case GlobalValue::DLLImportLinkage: return 5;
- case GlobalValue::DLLExportLinkage: return 6;
- case GlobalValue::ExternalWeakLinkage: return 7;
- case GlobalValue::CommonLinkage: return 8;
- case GlobalValue::PrivateLinkage: return 9;
- case GlobalValue::WeakODRLinkage: return 10;
- case GlobalValue::LinkOnceODRLinkage: return 11;
- case GlobalValue::AvailableExternallyLinkage: return 12;
- case GlobalValue::LinkerPrivateLinkage: return 13;
- case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::ExternalLinkage: return 0;
+ case GlobalValue::WeakAnyLinkage: return 1;
+ case GlobalValue::AppendingLinkage: return 2;
+ case GlobalValue::InternalLinkage: return 3;
+ case GlobalValue::LinkOnceAnyLinkage: return 4;
+ case GlobalValue::DLLImportLinkage: return 5;
+ case GlobalValue::DLLExportLinkage: return 6;
+ case GlobalValue::ExternalWeakLinkage: return 7;
+ case GlobalValue::CommonLinkage: return 8;
+ case GlobalValue::PrivateLinkage: return 9;
+ case GlobalValue::WeakODRLinkage: return 10;
+ case GlobalValue::LinkOnceODRLinkage: return 11;
+ case GlobalValue::AvailableExternallyLinkage: return 12;
+ case GlobalValue::LinkerPrivateLinkage: return 13;
+ case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
}
}
@@ -503,13 +485,14 @@ static void WriteMDNode(const MDNode *N,
Record.push_back(0);
}
}
- unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
- bitc::METADATA_NODE;
+ unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
+ bitc::METADATA_NODE2;
Stream.EmitRecord(MDCode, Record, 0);
Record.clear();
}
-static void WriteModuleMetadata(const ValueEnumerator &VE,
+static void WriteModuleMetadata(const Module *M,
+ const ValueEnumerator &VE,
BitstreamWriter &Stream) {
const ValueEnumerator::ValueList &Vals = VE.getMDValues();
bool StartedMetadataBlock = false;
@@ -544,29 +527,30 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
// Emit the finished record.
Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
Record.clear();
- } else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(Vals[i].first)) {
- if (!StartedMetadataBlock) {
- Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
- StartedMetadataBlock = true;
- }
-
- // Write name.
- StringRef Str = NMD->getName();
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- Record.push_back(Str[i]);
- Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
- Record.clear();
+ }
+ }
- // Write named metadata operands.
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- if (NMD->getOperand(i))
- Record.push_back(VE.getValueID(NMD->getOperand(i)));
- else
- Record.push_back(~0U);
- }
- Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
- Record.clear();
+ // Write named metadata.
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
}
+
+ // Write name.
+ StringRef Str = NMD->getName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ Record.push_back(Str[i]);
+ Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+ Record.clear();
+
+ // Write named metadata operands.
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(NMD->getOperand(i)));
+ Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+ Record.clear();
}
if (StartedMetadataBlock)
@@ -601,7 +585,7 @@ static void WriteMetadataAttachment(const Function &F,
SmallVector<uint64_t, 64> Record;
// Write metadata attachments
- // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+ // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -619,7 +603,7 @@ static void WriteMetadataAttachment(const Function &F,
Record.push_back(MDs[i].first);
Record.push_back(VE.getValueID(MDs[i].second));
}
- Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
Record.clear();
}
@@ -634,12 +618,11 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
SmallVector<StringRef, 4> Names;
M->getMDKindNames(Names);
- assert(Names[0] == "" && "MDKind #0 is invalid");
- if (Names.size() == 1) return;
+ if (Names.empty()) return;
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
- for (unsigned MDKindID = 1, e = Names.size(); MDKindID != e; ++MDKindID) {
+ for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
Record.push_back(MDKindID);
StringRef KName = Names[MDKindID];
Record.append(KName.begin(), KName.end());
@@ -734,8 +717,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Code = bitc::CST_CODE_UNDEF;
} else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
if (IV->getBitWidth() <= 64) {
- int64_t V = IV->getSExtValue();
- if (V >= 0)
+ uint64_t V = IV->getSExtValue();
+ if ((int64_t)V >= 0)
Record.push_back(V << 1);
else
Record.push_back((-V << 1) | 1);
@@ -809,20 +792,6 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
Record.push_back(VE.getValueID(C->getOperand(i)));
AbbrevToUse = AggregateAbbrev;
- } else if (isa<ConstantUnion>(C)) {
- Code = bitc::CST_CODE_AGGREGATE;
-
- // Unions only have one entry but we must send type along with it.
- const Type *EntryKind = C->getOperand(0)->getType();
-
- const UnionType *UnTy = cast<UnionType>(C->getType());
- int UnionIndex = UnTy->getElementTypeIndex(EntryKind);
- assert(UnionIndex != -1 && "Constant union contains invalid entry");
-
- Record.push_back(UnionIndex);
- Record.push_back(VE.getValueID(C->getOperand(0)));
-
- AbbrevToUse = AggregateAbbrev;
} else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
switch (CE->getOpcode()) {
default:
@@ -902,6 +871,9 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(VE.getValueID(BA->getFunction()));
Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
} else {
+#ifndef NDEBUG
+ C->dump();
+#endif
llvm_unreachable("Unknown constant!");
}
Stream.EmitRecord(Code, Record, AbbrevToUse);
@@ -1139,7 +1111,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
- Code = bitc::FUNC_CODE_INST_CALL;
+ Code = bitc::FUNC_CODE_INST_CALL2;
Vals.push_back(VE.getAttributeID(CI.getAttributes()));
Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
@@ -1283,7 +1255,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
Vals.push_back(DL.getCol());
Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
- Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
Vals.clear();
LastDL = DL;
@@ -1532,7 +1504,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
WriteModuleConstants(VE, Stream);
// Emit metadata.
- WriteModuleMetadata(VE, Stream);
+ WriteModuleMetadata(M, VE, Stream);
// Emit function bodies.
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3a0d3ce0be99..91e115cba6cc 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -21,7 +21,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit WriteBitcodePass(raw_ostream &o)
- : ModulePass(&ID), OS(o) {}
+ : ModulePass(ID), OS(o) {}
const char *getPassName() const { return "Bitcode Writer"; }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 7fa425a7d871..2f02262c36af 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -75,7 +75,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
// Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
- EnumerateMDSymbolTable(M->getMDSymbolTable());
+ EnumerateNamedMetadata(M);
SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
@@ -137,7 +137,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert (I != InstructionMap.end() && "Instruction is not mapped!");
- return I->second;
+ return I->second;
}
void ValueEnumerator::setInstructionID(const Instruction *I) {
@@ -207,35 +207,48 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
EnumerateValue(VI->getValue());
}
-/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata
-/// table.
-void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) {
- for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end();
- MI != ME; ++MI)
- EnumerateValue(MI->getValue());
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ EnumerateNamedMDNode(I);
}
void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
- // Check to see if it's already in!
- unsigned &MDValueID = MDValueMap[MD];
- if (MDValueID) {
- // Increment use count.
- MDValues[MDValueID-1].second++;
- return;
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+ EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (isa<MDNode>(V) || isa<MDString>(V))
+ EnumerateMetadata(V);
+ else if (!isa<Instruction>(V) && !isa<Argument>(V))
+ EnumerateValue(V);
+ } else
+ EnumerateType(Type::getVoidTy(N->getContext()));
}
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+ assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
// Enumerate the type of this value.
EnumerateType(MD->getType());
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- if (MDNode *E = MD->getOperand(i))
- EnumerateValue(E);
- MDValues.push_back(std::make_pair(MD, 1U));
- MDValueMap[MD] = Values.size();
-}
+ const MDNode *N = dyn_cast<MDNode>(MD);
+
+ // In the module-level pass, skip function-local nodes themselves, but
+ // do walk their operands.
+ if (N && N->isFunctionLocal() && N->getFunction()) {
+ EnumerateMDNodeOperands(N);
+ return;
+ }
-void ValueEnumerator::EnumerateMetadata(const Value *MD) {
- assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
// Check to see if it's already in!
unsigned &MDValueID = MDValueMap[MD];
if (MDValueID) {
@@ -243,37 +256,52 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) {
MDValues[MDValueID-1].second++;
return;
}
+ MDValues.push_back(std::make_pair(MD, 1U));
+ MDValueID = MDValues.size();
+
+ // Enumerate all non-function-local operands.
+ if (N)
+ EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+ assert(N->isFunctionLocal() && N->getFunction() &&
+ "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
// Enumerate the type of this value.
- EnumerateType(MD->getType());
+ EnumerateType(N->getType());
- if (const MDNode *N = dyn_cast<MDNode>(MD)) {
- MDValues.push_back(std::make_pair(MD, 1U));
- MDValueMap[MD] = MDValues.size();
- MDValueID = MDValues.size();
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (Value *V = N->getOperand(i))
- EnumerateValue(V);
- else
- EnumerateType(Type::getVoidTy(MD->getContext()));
- }
- if (N->isFunctionLocal() && N->getFunction())
- FunctionLocalMDs.push_back(N);
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[N];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
return;
}
-
- // Add the value.
- assert(isa<MDString>(MD) && "Unknown metadata kind");
- MDValues.push_back(std::make_pair(MD, 1U));
+ MDValues.push_back(std::make_pair(N, 1U));
MDValueID = MDValues.size();
+
+ // To incoroporate function-local information visit all function-local
+ // MDNodes and all function-local values they reference.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *O = dyn_cast<MDNode>(V)) {
+ if (O->isFunctionLocal() && O->getFunction())
+ EnumerateFunctionLocalMetadata(O);
+ } else if (isa<Instruction>(V) || isa<Argument>(V))
+ EnumerateValue(V);
+ }
+
+ // Also, collect all function-local MDNodes for easy access.
+ FunctionLocalMDs.push_back(N);
}
void ValueEnumerator::EnumerateValue(const Value *V) {
assert(!V->getType()->isVoidTy() && "Can't insert void values!");
- if (isa<MDNode>(V) || isa<MDString>(V))
- return EnumerateMetadata(V);
- else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V))
- return EnumerateNamedMDNode(NMD);
+ assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+ "EnumerateValue doesn't handle Metadata!");
// Check to see if it's already in!
unsigned &ValueID = ValueMap[V];
@@ -359,7 +387,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
// blockaddress.
if (isa<BasicBlock>(Op)) continue;
- EnumerateOperandType(cast<Constant>(Op));
+ EnumerateOperandType(Op);
}
if (const MDNode *N = dyn_cast<MDNode>(V)) {
@@ -368,7 +396,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
EnumerateOperandType(Elem);
}
} else if (isa<MDString>(V) || isa<MDNode>(V))
- EnumerateValue(V);
+ EnumerateMetadata(V);
}
void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
@@ -386,10 +414,11 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
void ValueEnumerator::incorporateFunction(const Function &F) {
InstructionCount = 0;
NumModuleValues = Values.size();
+ NumModuleMDValues = MDValues.size();
// Adding function arguments to the value table.
- for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I)
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I)
EnumerateValue(I);
FirstFuncConstantID = Values.size();
@@ -416,7 +445,6 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
FirstInstID = Values.size();
- FunctionLocalMDs.clear();
SmallVector<MDNode *, 8> FnLocalMDVector;
// Add all of the instructions.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -428,6 +456,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
// Enumerate metadata after the instructions they might refer to.
FnLocalMDVector.push_back(MD);
}
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ MDNode *N = MDs[i].second;
+ if (N->isFunctionLocal() && N->getFunction())
+ FnLocalMDVector.push_back(N);
+ }
+
if (!I->getType()->isVoidTy())
EnumerateValue(I);
}
@@ -435,18 +472,22 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
// Add all of the function-local metadata.
for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
- EnumerateOperandType(FnLocalMDVector[i]);
+ EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
}
void ValueEnumerator::purgeFunction() {
/// Remove purged values from the ValueMap.
for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
ValueMap.erase(Values[i].first);
+ for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+ MDValueMap.erase(MDValues[i].first);
for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
ValueMap.erase(BasicBlocks[i]);
Values.resize(NumModuleValues);
+ MDValues.resize(NumModuleMDValues);
BasicBlocks.clear();
+ FunctionLocalMDs.clear();
}
static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 2b9b15fa5a77..cd1d2371b701 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -72,6 +72,11 @@ private:
/// When a function is incorporated, this is the size of the Values list
/// before incorporation.
unsigned NumModuleValues;
+
+ /// When a function is incorporated, this is the size of the MDValues list
+ /// before incorporation.
+ unsigned NumModuleMDValues;
+
unsigned FirstFuncConstantID;
unsigned FirstInstID;
@@ -132,7 +137,9 @@ public:
private:
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+ void EnumerateMDNodeOperands(const MDNode *N);
void EnumerateMetadata(const Value *MD);
+ void EnumerateFunctionLocalMetadata(const MDNode *N);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
void EnumerateType(const Type *T);
@@ -141,7 +148,7 @@ private:
void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
- void EnumerateMDSymbolTable(const MDSymbolTable &ST);
+ void EnumerateNamedMetadata(const Module *M);
};
} // End llvm namespace
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index a7189acc3fec..5a634d6ccb01 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -41,8 +41,11 @@ DebugMod("agg-antidep-debugmod",
AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
MachineBasicBlock *BB) :
- NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) {
-
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
const unsigned BBSize = BB->size();
for (unsigned i = 0; i < NumTargetRegs; ++i) {
// Initialize all registers to be in their own group. Initially we
@@ -54,8 +57,7 @@ AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
}
}
-unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
-{
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
unsigned Node = GroupNodeIndices[Reg];
while (GroupNodes[Node] != Node)
Node = GroupNodes[Node];
@@ -145,8 +147,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -226,7 +228,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
DEBUG(MI->dump());
DEBUG(dbgs() << "\tRegs:");
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
// If Reg is current live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
@@ -328,8 +330,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
const char *tag,
const char *header,
const char *footer) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -364,7 +366,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
unsigned Count,
std::set<unsigned>& PassthruRegs) {
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -560,8 +562,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned AntiDepGroupIndex,
RenameOrderType& RenameOrder,
std::map<unsigned, unsigned> &RenameMap) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -652,6 +654,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
if (R == RB) R = RE;
--R;
const unsigned NewSuperReg = *R;
+ // Don't consider non-allocatable registers
+ if (!AllocatableSet.test(NewSuperReg)) continue;
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
@@ -733,8 +737,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 91ebb850d19d..9d715ccf79f8 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -59,27 +59,27 @@ namespace llvm {
/// currently representing the group that the register belongs to.
/// Register 0 is always represented by the 0 group, a group
/// composed of registers that are not eligible for anti-aliasing.
- unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> GroupNodeIndices;
/// RegRefs - Map registers to all their references within a live range.
std::multimap<unsigned, RegisterReference> RegRefs;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
/// DefIndices - The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
- unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> DefIndices;
public:
AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
/// GetKillIndices - Return the kill indices.
- unsigned *GetKillIndices() { return KillIndices; }
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
/// GetDefIndices - Return the define indices.
- unsigned *GetDefIndices() { return DefIndices; }
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
/// GetRegRefs - Return the RegRefs map.
std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index f71eee5d01b8..e3dd646c952e 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -109,7 +109,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
- if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+ if (GV && GV->getName() == "llvm.eh.catch.all.value") {
assert(GV->hasInitializer() &&
"The EH catch-all value must have an initializer");
Value *Init = GV->getInitializer();
@@ -171,7 +171,7 @@ ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
FOC = FPC = ISD::SETFALSE;
break;
}
- if (FiniteOnlyFPMath())
+ if (NoNaNsFPMath)
return FOC;
else
return FPC;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index db1b37ab263f..d358ab20ffc5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -91,7 +91,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
- : MachineFunctionPass(&ID),
+ : MachineFunctionPass(ID),
TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
@@ -200,11 +200,17 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
- // .weak_definition _foo
- OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
} else if (MAI->getLinkOnceDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
@@ -510,12 +516,8 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
}
// Check for spill-induced copies
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
- SrcSubIdx, DstSubIdx)) {
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
- }
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
}
/// EmitImplicitDef - This method emits the specified machine instruction
@@ -603,12 +605,15 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I) {
// Print a label for the basic block.
EmitBasicBlockStart(I);
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
+ LastMI = II;
+
// Print the assembly for the instruction.
if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
!II->isDebugValue()) {
@@ -625,7 +630,7 @@ void AsmPrinter::EmitFunctionBody() {
EmitComments(*II, OutStreamer.GetCommentOS());
switch (II->getOpcode()) {
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
@@ -656,11 +661,18 @@ void AsmPrinter::EmitFunctionBody() {
}
}
}
-
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) {
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
MCInst Noop;
TM.getInstrInfo()->getNoopForMachoTarget(Noop);
if (Noop.getOpcode()) {
@@ -1206,6 +1218,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
}
}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
//===----------------------------------------------------------------------===//
@@ -1244,6 +1272,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
@@ -1262,10 +1291,17 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
if (C != CE)
return LowerConstant(C, AP);
-#ifndef NDEBUG
- CE->dump();
-#endif
- llvm_unreachable("FIXME: Don't support this constant expr");
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ return MCConstantExpr::Create(0, Ctx);
case Instruction::GetElementPtr: {
const TargetData &TD = *AP.TM.getTargetData();
// Generate a symbolic expression for the byte address
@@ -1413,21 +1449,6 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
"Layout of constant struct may be incorrect!");
}
-static void EmitGlobalConstantUnion(const ConstantUnion *CU,
- unsigned AddrSpace, AsmPrinter &AP) {
- const TargetData *TD = AP.TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CU->getType());
-
- const Constant *Contents = CU->getOperand(0);
- unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
-
- // Print the actually filled part
- EmitGlobalConstantImpl(Contents, AddrSpace, AP);
-
- // And pad with enough zeroes
- AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
-}
-
static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
// FP Constants are printed as integer constants to avoid losing
@@ -1530,7 +1551,7 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
case 8:
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
- AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
return;
default:
EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
@@ -1553,9 +1574,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
return;
}
- if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
- return EmitGlobalConstantUnion(CVU, AddrSpace, AP);
-
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return EmitGlobalConstantVector(V, AddrSpace, AP);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b310578584bc..ce4519c541e3 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -36,7 +36,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128()) {
+ if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
// FIXME: MCize.
OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
return;
@@ -61,7 +61,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128() && PadTo == 0) {
+ if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
// FIXME: MCize.
OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
return;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 202d9b67fd15..df0316814c08 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -22,7 +22,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegistry.h"
@@ -72,16 +71,18 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI);
- OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+ OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
- Parser.setTargetParser(*TAP.get());
+ Parser->setTargetParser(*TAP.get());
// Don't implicitly switch to the text section before the asm.
- int Res = Parser.Run(/*NoInitialTextSection*/ true,
- /*NoFinalize*/ true);
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 65c1d190216f..c886a5ecc615 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -44,7 +44,7 @@ using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
cl::desc("Print DbgScope information for each machine instruction"));
-static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -116,8 +116,8 @@ public:
/// addGlobalType - Add a new global type to the compile unit.
///
- void addGlobalType(StringRef Name, DIE *Die) {
- GlobalTypes[Name] = Die;
+ void addGlobalType(StringRef Name, DIE *Die) {
+ GlobalTypes[Name] = Die;
}
/// getDIE - Returns the debug information entry map slot for the
@@ -131,8 +131,9 @@ public:
/// getDIEEntry - Returns the debug information entry for the speciefied
/// debug variable.
- DIEEntry *getDIEEntry(const MDNode *N) {
- DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N);
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
if (I == MDNodeToDIEEntryMap.end())
return NULL;
return I->second;
@@ -179,6 +180,73 @@ public:
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ unsigned getTag() const { return Var.getTag(); }
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ return Ty;
+ }
+ return Ty;
+ }
};
//===----------------------------------------------------------------------===//
@@ -194,7 +262,7 @@ class DbgScope {
DbgScope *Parent; // Parent to this scope.
DIDescriptor Desc; // Debug info descriptor for scope.
// Location at which this scope is inlined.
- AssertingVH<const MDNode> InlinedAtLocation;
+ AssertingVH<const MDNode> InlinedAtLocation;
bool AbstractScope; // Abstract Scope
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
@@ -220,19 +288,19 @@ public:
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
const MDNode *getScopeNode() const { return Desc; }
const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
- const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+ const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
/// openInsnRange - This scope covers instruction range starting from MI.
void openInsnRange(const MachineInstr *MI) {
- if (!FirstInsn)
+ if (!FirstInsn)
FirstInsn = MI;
-
+
if (Parent)
Parent->openInsnRange(MI);
}
- /// extendInsnRange - Extend the current instruction range covered by
+ /// extendInsnRange - Extend the current instruction range covered by
/// this scope.
void extendInsnRange(const MachineInstr *MI) {
assert (FirstInsn && "MI Range is not open!");
@@ -247,9 +315,9 @@ public:
void closeInsnRange(DbgScope *NewScope = NULL) {
assert (LastInsn && "Last insn missing!");
Ranges.push_back(DbgRange(FirstInsn, LastInsn));
- FirstInsn = NULL;
+ FirstInsn = NULL;
LastInsn = NULL;
- // If Parent dominates NewScope then do not close Parent's instruction
+ // If Parent dominates NewScope then do not close Parent's instruction
// range.
if (Parent && (!NewScope || !Parent->dominates(NewScope)))
Parent->closeInsnRange(NewScope);
@@ -264,7 +332,7 @@ public:
unsigned getDFSIn() const { return DFSIn; }
void setDFSIn(unsigned I) { DFSIn = I; }
bool dominates(const DbgScope *S) {
- if (S == this)
+ if (S == this)
return true;
if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
return true;
@@ -313,14 +381,13 @@ DbgScope::~DbgScope() {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), FirstCU(0),
- AbbreviationsSet(InitAbbreviationsSetSize),
+ AbbreviationsSet(InitAbbreviationsSetSize),
CurrentFnDbgScope(0), PrevLabel(NULL) {
NextStringPoolNumber = 0;
-
+
DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
- DwarfDebugLineSectionSym = CurrentLineSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
{
@@ -377,7 +444,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
unsigned Form, uint64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = Integer == 1 ?
+ DIEValue *Value = Integer == 1 ?
DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -392,7 +459,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
}
/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
+/// keeps string reference.
void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
StringRef String) {
DIEValue *Value = new (DIEValueAllocator) DIEString(String);
@@ -434,14 +501,14 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
+void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
// Verify variable.
- if (!V->Verify())
+ if (!V.Verify())
return;
- unsigned Line = V->getLineNumber();
- unsigned FileID = GetOrCreateSourceID(V->getContext().getDirectory(),
- V->getContext().getFilename());
+ unsigned Line = V.getLineNumber();
+ unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
+ V.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -449,14 +516,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
+void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
// Verify global variable.
- if (!G->Verify())
+ if (!G.Verify())
return;
- unsigned Line = G->getLineNumber();
- unsigned FileID = GetOrCreateSourceID(G->getContext().getDirectory(),
- G->getContext().getFilename());
+ unsigned Line = G.getLineNumber();
+ unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
+ G.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -464,19 +531,19 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
+void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
// Verify subprogram.
- if (!SP->Verify())
+ if (!SP.Verify())
return;
// If the line number is 0, don't add it.
- if (SP->getLineNumber() == 0)
+ if (SP.getLineNumber() == 0)
return;
- unsigned Line = SP->getLineNumber();
- if (!SP->getContext().Verify())
+ unsigned Line = SP.getLineNumber();
+ if (!SP.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(SP->getDirectory(),
- SP->getFilename());
+ unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
+ SP.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -484,16 +551,16 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
+void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
// Verify type.
- if (!Ty->Verify())
+ if (!Ty.Verify())
return;
- unsigned Line = Ty->getLineNumber();
- if (!Ty->getContext().Verify())
+ unsigned Line = Ty.getLineNumber();
+ if (!Ty.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(Ty->getContext().getDirectory(),
- Ty->getContext().getFilename());
+ unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
+ Ty.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -501,14 +568,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
+void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
// Verify namespace.
- if (!NS->Verify())
+ if (!NS.Verify())
return;
- unsigned Line = NS->getLineNumber();
- StringRef FN = NS->getFilename();
- StringRef Dir = NS->getDirectory();
+ unsigned Line = NS.getLineNumber();
+ StringRef FN = NS.getFilename();
+ StringRef Dir = NS.getDirectory();
unsigned FileID = GetOrCreateSourceID(Dir, FN);
assert(FileID && "Invalid file id");
@@ -516,55 +583,21 @@ void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
}
-/* Byref variables, in Blocks, are declared by the programmer as
- "SomeType VarName;", but the compiler creates a
- __Block_byref_x_VarName struct, and gives the variable VarName
- either the struct, or a pointer to the struct, as its type. This
- is necessary for various behind-the-scenes things the compiler
- needs to do with by-reference variables in blocks.
-
- However, as far as the original *programmer* is concerned, the
- variable should still have type 'SomeType', as originally declared.
-
- The following function dives into the __Block_byref_x_VarName
- struct to find the original type of the variable. This will be
- passed back to the code generating the type for the Debug
- Information Entry for the variable 'VarName'. 'VarName' will then
- have the original type 'SomeType' in its debug information.
-
- The original type 'SomeType' will be the type of the field named
- 'VarName' inside the __Block_byref_x_VarName struct.
-
- NOTE: In order for this to not completely fail on the debugger
- side, the Debug Information Entry for the variable VarName needs to
- have a DW_AT_location that tells the debugger how to unwind through
- the pointers and __Block_byref_x_VarName struct to find the actual
- value of the variable. The function addBlockByrefType does this. */
-
-/// Find the type the programmer originally declared the variable to be
-/// and return that type.
-///
-DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
-
- DIType subType = Ty;
- unsigned tag = Ty.getTag();
-
- if (tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty);
- subType = DTy.getTypeDerivedFrom();
- }
-
- DICompositeType blockStruct = DICompositeType(subType);
- DIArray Elements = blockStruct.getTypeArray();
-
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Elements.getElement(i);
- DIDerivedType DT = DIDerivedType(Element);
- if (Name == DT.getName())
- return (DT.getTypeDerivedFrom());
- }
+/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+/// on provided frame index.
+void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
+ MachineLocation Location;
+ unsigned FrameReg;
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ Location.set(FrameReg, Offset);
- return Ty;
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
}
/// addComplexAddress - Start with the address based on the location provided,
@@ -575,8 +608,7 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
unsigned Attribute,
const MachineLocation &Location) {
- const DIVariable &VD = DV->getVariable();
- DIType Ty = VD.getType();
+ DIType Ty = DV->getType();
// Decode the original location, and use that as the start of the byref
// variable's location.
@@ -603,12 +635,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
}
- for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
- uint64_t Element = VD.getAddrElement(i);
+ for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
if (Element == DIFactory::OpPlus) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
} else if (Element == DIFactory::OpDeref) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
} else llvm_unreachable("unknown DIFactory Opcode");
@@ -681,13 +713,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
unsigned Attribute,
const MachineLocation &Location) {
- const DIVariable &VD = DV->getVariable();
- DIType Ty = VD.getType();
+ DIType Ty = DV->getType();
DIType TmpTy = Ty;
unsigned Tag = Ty.getTag();
bool isPointer = false;
- StringRef varName = VD.getName();
+ StringRef varName = DV->getName();
if (Tag == dwarf::DW_TAG_pointer_type) {
DIDerivedType DTy = DIDerivedType(Ty);
@@ -835,26 +866,26 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
assert (MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
-
+
// Get the raw data form of the floating point.
const APInt FltVal = FPImm.bitcastToAPInt();
const char *FltPtr = (const char*)FltVal.getRawData();
-
+
int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
bool LittleEndian = Asm->getTargetData().isLittleEndian();
int Incr = (LittleEndian ? 1 : -1);
int Start = (LittleEndian ? 0 : NumBytes - 1);
int Stop = (LittleEndian ? NumBytes : -1);
-
+
// Output the constant to DWARF one byte at a time.
for (; Start != Stop; Start += Incr)
addUInt(Block, 0, dwarf::DW_FORM_data1,
(unsigned char)0xFF & FltPtr[Start]);
-
+
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
if (VS)
addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
- return true;
+ return true;
}
@@ -872,7 +903,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
- else
+ else
getCompileUnit(Context)->addDie(Die);
}
@@ -965,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy.isForwardDecl())
- addSourceLine(&Buffer, &DTy);
+ addSourceLine(&Buffer, DTy);
}
/// constructTypeDIE - Construct type DIE from DICompositeType.
@@ -1039,7 +1070,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addType(ElemDie, DV.getType());
addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- addSourceLine(ElemDie, &DV);
+ addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType())
ElemDie = createMemberDIE(DIDerivedType(Element));
else
@@ -1057,7 +1088,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
getOrCreateTypeDIE(DIType(ContainingType)));
else {
DIDescriptor Context = CTy.getContext();
@@ -1073,7 +1104,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
|| Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
{
// Add size if non-zero (derived types might be zero-sized.)
@@ -1089,7 +1120,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add source line info if available.
if (!CTy.isForwardDecl())
- addSourceLine(&Buffer, &CTy);
+ addSourceLine(&Buffer, CTy);
}
}
@@ -1149,7 +1180,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
return Enumerator;
}
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
/// printer to not emit usual symbol prefix before the symbol name is used then
/// return linkage name after skipping this special LLVM prefix.
static StringRef getRealLinkageName(StringRef LinkageName) {
@@ -1159,40 +1190,16 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
return LinkageName;
}
-/// createGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
- // If the global variable was optmized out then no need to create debug info
- // entry.
- if (!GV.getGlobal()) return NULL;
- if (GV.getDisplayName().empty()) return NULL;
-
- DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
- addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- GV.getDisplayName());
-
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
-
- addType(GVDie, GV.getType());
- if (!GV.isLocalToUnit())
- addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- addSourceLine(GVDie, &GV);
-
- return GVDie;
-}
-
/// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
+DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
DIE *MemberDie = new DIE(DT.getTag());
StringRef Name = DT.getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
+
addType(MemberDie, DT.getTypeDerivedFrom());
- addSourceLine(MemberDie, &DT);
+ addSourceLine(MemberDie, DT);
DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
@@ -1240,7 +1247,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
VBaseLocationDie);
} else
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
@@ -1261,7 +1268,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
}
/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
CompileUnit *SPCU = getCompileUnit(SP);
DIE *SPDie = SPCU->getDIE(SP);
if (SPDie)
@@ -1277,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
getRealLinkageName(LinkageName));
- addSourceLine(SPDie, &SP);
+ addSourceLine(SPDie, SP);
// Add prototyped tag, if C or ObjC.
unsigned Lang = SP.getCompileUnit().getLanguage();
@@ -1302,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
- ContainingTypeMap.insert(std::make_pair(SPDie,
+ ContainingTypeMap.insert(std::make_pair(SPDie,
SP.getContainingType()));
}
@@ -1331,10 +1338,14 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
if (!SP.isLocalToUnit())
addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
+
if (SP.isOptimized())
addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
// DW_TAG_inlined_subroutine may refer to this DIE.
SPCU->insertDIE(SP, SPDie);
@@ -1394,18 +1405,18 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
-
+
// There is not any need to generate specification DIE for a function
// defined at compile unit level. If a function is defined inside another
// function then gdb prefers the definition at top level and but does not
- // expect specification DIE in parent function. So avoid creating
+ // expect specification DIE in parent function. So avoid creating
// specification DIE for a function defined inside a function.
if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
- !SP.getContext().isFile() &&
+ !SP.getContext().isFile() &&
!isSubprogramContext(SP.getContext())) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
- // Add arguments.
+
+ // Add arguments.
DICompositeType SPTy = SP.getType();
DIArray Args = SPTy.getTypeArray();
unsigned SPTag = SPTy.getTag();
@@ -1420,11 +1431,11 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
}
DIE *SPDeclDie = SPDie;
SPDie = new DIE(dwarf::DW_TAG_subprogram);
- addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
SPDeclDie);
SPCU->addDie(SPDie);
}
-
+
// Pick up abstract subprogram DIE.
if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
SPDie = new DIE(dwarf::DW_TAG_subprogram);
@@ -1459,7 +1470,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
if (Ranges.size() > 1) {
// .debug_range section has not been laid out yet. Emit offset in
- // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
// DW_AT_ranges appropriately.
addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
@@ -1480,7 +1491,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
-
+
addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
@@ -1493,7 +1504,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
- assert (Ranges.empty() == false
+ assert (Ranges.empty() == false
&& "DbgScope does not have instruction markers!");
// FIXME : .debug_inlined section specification does not clearly state how
@@ -1551,16 +1562,14 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
- // Get the descriptor.
- const DIVariable &VD = DV->getVariable();
- StringRef Name = VD.getName();
+ StringRef Name = DV->getName();
if (Name.empty())
return NULL;
// Translate tag to proper Dwarf tag. The result variable is dropped for
// now.
unsigned Tag;
- switch (VD.getTag()) {
+ switch (DV->getTag()) {
case dwarf::DW_TAG_return_variable:
return NULL;
case dwarf::DW_TAG_arg_variable:
@@ -1586,18 +1595,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
dwarf::DW_FORM_ref4, AbsDIE);
else {
addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- addSourceLine(VariableDie, &VD);
+ addSourceLine(VariableDie, DV->getVariable());
// Add variable type.
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex
- // addresses instead.
- if (VD.isBlockByrefVariable())
- addType(VariableDie, getBlockByrefType(VD.getType(), Name));
- else
- addType(VariableDie, VD.getType());
+ addType(VariableDie, DV->getType());
}
- if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+ if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
if (Scope->isAbstractScope()) {
@@ -1623,15 +1627,22 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
const MachineInstr *DVInsn = DVI->second;
const MCSymbol *DVLabel = findVariableLabel(DV);
bool updated = false;
- // FIXME : Handle getNumOperands != 3
+ // FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
- if (DVInsn->getOperand(0).isReg())
- updated =
- addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
+ updated = true;
+ } else
+ updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+ }
else if (DVInsn->getOperand(0).isImm())
updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isFPImm())
- updated =
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
@@ -1651,24 +1662,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
}
DV->setDIE(VariableDie);
return VariableDie;
- }
+ }
// .. else use frame index, if available.
- MachineLocation Location;
- unsigned FrameReg;
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
int FI = 0;
- if (findVariableFrameIndex(DV, &FI)) {
- int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- Location.set(FrameReg, Offset);
-
- if (VD.hasComplexAddress())
- addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
- addAddress(VariableDie, dwarf::DW_AT_location, Location);
- }
+ if (findVariableFrameIndex(DV, &FI))
+ addVariableAddress(DV, VariableDie, FI);
+
DV->setDIE(VariableDie);
return VariableDie;
@@ -1677,7 +1677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
void DwarfDebug::addPubTypes(DISubprogram SP) {
DICompositeType SPTy = SP.getType();
unsigned SPTag = SPTy.getTag();
- if (SPTag != dwarf::DW_TAG_subroutine_type)
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
return;
DIArray Args = SPTy.getTypeArray();
@@ -1699,7 +1699,7 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
-
+
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
@@ -1718,9 +1718,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
else
ScopeDIE = constructLexicalScopeDIE(Scope);
if (!ScopeDIE) return NULL;
-
+
// Add variables to scope.
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
if (VariableDIE)
@@ -1736,9 +1736,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
ScopeDIE->addChild(NestedDIE);
}
- if (DS.isSubprogram())
+ if (DS.isSubprogram())
addPubTypes(DISubprogram(DS));
-
+
return ScopeDIE;
}
@@ -1748,6 +1748,8 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
/// maps as well.
unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
unsigned DId;
+ assert (DirName.empty() == false && "Invalid directory name!");
+
StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
if (DI != DirectoryIdMap.end()) {
DId = DI->getValue();
@@ -1789,12 +1791,12 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
TheCU->insertDIE(NS, NDie);
if (!NS.getName().empty())
addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
- addSourceLine(NDie, &NS);
+ addSourceLine(NDie, NS);
addToContextOwner(NDie, NS.getContext());
return NDie;
}
-/// constructCompileUnit - Create new CompileUnit for the given
+/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
void DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
@@ -1812,9 +1814,12 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
// simplifies debug range entries.
addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. This offset is calculated
- // during endMoudle().
- addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ // compile unit in debug_line section.
+ if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList())
+ addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("section_line"));
+ else
+ addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
if (!Dir.empty())
addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1865,64 +1870,98 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
return I->second;
}
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+ DIDerivedType DTy(Ty);
+ if (DTy.Verify())
+ return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+ DIBasicType BTy(Ty);
+ if (BTy.Verify()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char)
+ return true;
+ }
+ return false;
+}
/// constructGlobalVariableDIE - Construct global variable DIE.
void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
- DIGlobalVariable DI_GV(N);
+ DIGlobalVariable GV(N);
// If debug information is malformed then ignore it.
- if (DI_GV.Verify() == false)
+ if (GV.Verify() == false)
return;
// Check for pre-existence.
CompileUnit *TheCU = getCompileUnit(N);
- if (TheCU->getDIE(DI_GV))
+ if (TheCU->getDIE(GV))
return;
- DIE *VariableDie = createGlobalVariableDIE(DI_GV);
- if (!VariableDie)
- return;
-
- // Add to map.
- TheCU->insertDIE(N, VariableDie);
+ DIType GTy = GV.getType();
+ DIE *VariableDIE = new DIE(GV.getTag());
- // Add to context owner.
- DIDescriptor GVContext = DI_GV.getContext();
- // Do not create specification DIE if context is either compile unit
- // or a subprogram.
- if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() &&
- !isSubprogramContext(GVContext)) {
- // Create specification DIE.
- DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
- addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
- dwarf::DW_FORM_ref4, VariableDie);
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(DI_GV.getGlobal()));
- addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
- addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- TheCU->addDie(VariableSpecDIE);
- } else {
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(DI_GV.getGlobal()));
- addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
- }
- addToContextOwner(VariableDie, GVContext);
-
- // Expose as global. FIXME - need to check external flag.
- TheCU->addGlobal(DI_GV.getName(), VariableDie);
+ bool isGlobalVariable = GV.getGlobal() != NULL;
- DIType GTy = DI_GV.getType();
+ // Add name.
+ addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty() && isGlobalVariable)
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+ // Add type.
+ addType(VariableDIE, GTy);
if (GTy.isCompositeType() && !GTy.getName().empty()
&& !GTy.isForwardDecl()) {
DIEEntry *Entry = TheCU->getDIEEntry(GTy);
assert(Entry && "Missing global type!");
TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
}
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ // Expose as global.
+ TheCU->addGlobal(GV.getName(), VariableDIE);
+ }
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to map.
+ TheCU->insertDIE(N, VariableDIE);
+ // Add to context owner.
+ DIDescriptor GVContext = GV.getContext();
+ addToContextOwner(VariableDIE, GVContext);
+ // Add location.
+ if (isGlobalVariable) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ TheCU->addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ } else if (Constant *C = GV.getConstant()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (isUnsignedDIType(GTy))
+ addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ CI->getZExtValue());
+ else
+ addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ CI->getSExtValue());
+ }
+ }
return;
}
@@ -1965,7 +2004,7 @@ void DwarfDebug::beginModule(Module *M) {
DbgFinder.processModule(*M);
bool HasDebugInfo = false;
-
+
// Scan all the compile-units to see if there are any marked as the main unit.
// if not, we do not generate debug info.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
@@ -1975,15 +2014,15 @@ void DwarfDebug::beginModule(Module *M) {
break;
}
}
-
+
if (!HasDebugInfo) return;
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
-
+
// Emit initial sections.
EmitSectionLabels();
-
+
// Create all the compile unit DIEs.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
E = DbgFinder.compile_unit_end(); I != E; ++I)
@@ -1999,6 +2038,11 @@ void DwarfDebug::beginModule(Module *M) {
E = DbgFinder.global_variable_end(); I != E; ++I)
constructGlobalVariableDIE(*I);
+ //getOrCreateTypeDIE
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
@@ -2025,6 +2069,7 @@ void DwarfDebug::beginModule(Module *M) {
void DwarfDebug::endModule() {
if (!FirstCU) return;
const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
@@ -2032,25 +2077,27 @@ void DwarfDebug::endModule() {
if (!SP.Verify()) continue;
// Collect info for variables that were optimized out.
+ if (!SP.isDefinition()) continue;
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- NamedMDNode *NMD =
+ NamedMDNode *NMD =
M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
if (!NMD) continue;
unsigned E = NMD->getNumOperands();
if (!E) continue;
DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+ DeadFnScopeMap[SP] = Scope;
for (unsigned I = 0; I != E; ++I) {
DIVariable DV(NMD->getOperand(I));
if (!DV.Verify()) continue;
Scope->addVariable(new DbgVariable(DV));
}
-
+
// Construct subprogram DIE and add variables DIEs.
constructSubprogramDIE(SP);
DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
if (VariableDIE)
@@ -2099,15 +2146,15 @@ void DwarfDebug::endModule() {
// Compute DIE offsets and sizes.
computeSizeAndOffsets();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit all the DIEs into a debug info section
emitDebugInfo();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
+ // Emit source line correspondence into a debug line section.
+ emitDebugLines();
+
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2131,7 +2178,9 @@ void DwarfDebug::endModule() {
// Emit info into a debug str section.
emitDebugStr();
-
+
+ // clean up.
+ DeleteContainerSeconds(DeadFnScopeMap);
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I)
delete I->second;
@@ -2139,7 +2188,7 @@ void DwarfDebug::endModule() {
}
/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
DebugLoc ScopeLoc) {
DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
@@ -2159,7 +2208,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
-void
+void
DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
@@ -2177,7 +2226,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
Scope = ConcreteScopes.lookup(IA);
if (Scope == 0)
Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
-
+
// If variable scope is not found then skip this variable.
if (Scope == 0)
continue;
@@ -2193,7 +2242,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
}
}
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
+/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in undefined reg.
static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
@@ -2202,7 +2251,7 @@ static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
return false;
}
-/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
@@ -2212,10 +2261,10 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
}
/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void
+void
DwarfDebug::collectVariableInfo(const MachineFunction *MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
-
+
/// collection info from MMI table.
collectVariableInfoFromMMITable(MF, Processed);
@@ -2244,11 +2293,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
continue;
const MachineInstr *PrevMI = MInsn;
- for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
+ for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
ME = DbgValues.end(); MI != ME; ++MI) {
- const MDNode *Var =
+ const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ if (Var == DV && isDbgValueInDefinedReg(*MI) &&
!PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
PrevMI = *MI;
@@ -2269,7 +2318,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
if (!CurFnArg)
- DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
+ DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2286,26 +2335,39 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
const MachineInstr *Begin = NULL;
const MachineInstr *End = NULL;
- for (SmallVector<const MachineInstr *, 4>::iterator
- MVI = MultipleValues.begin(), MVE = MultipleValues.end();
+ for (SmallVector<const MachineInstr *, 4>::iterator
+ MVI = MultipleValues.begin(), MVE = MultipleValues.end();
MVI != MVE; ++MVI) {
if (!Begin) {
Begin = *MVI;
continue;
- }
+ }
End = *MVI;
MachineLocation MLoc;
- MLoc.set(Begin->getOperand(0).getReg(), 0);
+ if (Begin->getNumOperands() == 3) {
+ if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+ MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+ } else
+ MLoc = Asm->getDebugValueLocation(Begin);
+
const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
const MCSymbol *SLabel = getLabelBeforeInsn(End);
- DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+ if (MLoc.getReg())
+ DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+
Begin = End;
if (MVI + 1 == MVE) {
// If End is the last instruction then its value is valid
// until the end of the funtion.
- MLoc.set(End->getOperand(0).getReg(), 0);
- DotDebugLocEntries.
- push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc));
+ MachineLocation EMLoc;
+ if (End->getNumOperands() == 3) {
+ if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+ EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+ } else
+ EMLoc = Asm->getDebugValueLocation(End);
+ if (EMLoc.getReg())
+ DotDebugLocEntries.
+ push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc));
}
}
DotDebugLocEntries.push_back(DotDebugLocEntry());
@@ -2314,11 +2376,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
// Collect info for variables that were optimized out.
const Function *F = MF->getFunction();
const Module *M = F->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
getRealLinkageName(F->getName())))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
continue;
DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
@@ -2364,7 +2426,7 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
return;
}
- // If location is unknown then use temp label for this DBG_VALUE
+ // If location is unknown then use temp label for this DBG_VALUE
// instruction.
if (MI->isDebugValue()) {
PrevLabel = MMI->getContext().CreateTempSymbol();
@@ -2393,7 +2455,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
}
/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
const MDNode *InlinedAt) {
if (!InlinedAt) {
DbgScope *WScope = DbgScopeMap.lookup(Scope);
@@ -2402,7 +2464,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
DbgScopeMap.insert(std::make_pair(Scope, WScope));
if (DIDescriptor(Scope).isLexicalBlock()) {
- DbgScope *Parent =
+ DbgScope *Parent =
getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
WScope->setParent(Parent);
Parent->addScope(WScope);
@@ -2419,7 +2481,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
CurrentFnDbgScope = WScope;
}
-
+
return WScope;
}
@@ -2448,14 +2510,14 @@ static bool hasValidLocation(LLVMContext &Ctx,
const MDNode *&Scope, const MDNode *&InlinedAt) {
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) return false;
-
+
const MDNode *S = DL.getScope(Ctx);
-
+
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
if (DIScope(S).isCompileUnit()) return false;
-
+
Scope = S;
InlinedAt = DL.getInlinedAt(Ctx);
return true;
@@ -2490,7 +2552,7 @@ static void calculateDominanceGraph(DbgScope *Scope) {
}
/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
-static
+static
void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
{
@@ -2507,9 +2569,9 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
// Check if instruction has valid location information.
if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
dbgs() << " [ ";
- if (InlinedAt)
+ if (InlinedAt)
dbgs() << "*";
- DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
+ DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
MI2ScopeMap.find(MInsn);
if (DI != MI2ScopeMap.end()) {
DbgScope *S = DI->second;
@@ -2517,7 +2579,7 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
PrevDFSIn = S->getDFSIn();
} else
dbgs() << PrevDFSIn;
- } else
+ } else
dbgs() << " [ x" << PrevDFSIn;
dbgs() << " ]";
MInsn->dump();
@@ -2555,26 +2617,26 @@ bool DwarfDebug::extractScopeInformation() {
PrevMI = MInsn;
continue;
}
-
+
// If scope has not changed then skip this instruction.
if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
PrevMI = MInsn;
continue;
}
- if (RangeBeginMI) {
- // If we have alread seen a beginning of a instruction range and
+ if (RangeBeginMI) {
+ // If we have alread seen a beginning of a instruction range and
// current instruction scope does not match scope of first instruction
// in this range then create a new instruction range.
DbgRange R(RangeBeginMI, PrevMI);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
PrevInlinedAt);
MIRanges.push_back(R);
- }
+ }
// This is a beginning of a new instruction range.
RangeBeginMI = MInsn;
-
+
// Reset previous markers.
PrevMI = MInsn;
PrevScope = Scope;
@@ -2588,7 +2650,7 @@ bool DwarfDebug::extractScopeInformation() {
MIRanges.push_back(R);
MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
}
-
+
if (!CurrentFnDbgScope)
return false;
@@ -2618,7 +2680,7 @@ bool DwarfDebug::extractScopeInformation() {
return !DbgScopeMap.empty();
}
-/// identifyScopeMarkers() -
+/// identifyScopeMarkers() -
/// Each DbgScope has first instruction and last instruction to mark beginning
/// and end of a scope respectively. Create an inverse map that list scopes
/// starts (and ends) with an instruction. One instruction may start (or end)
@@ -2628,23 +2690,23 @@ void DwarfDebug::identifyScopeMarkers() {
WorkList.push_back(CurrentFnDbgScope);
while (!WorkList.empty()) {
DbgScope *S = WorkList.pop_back_val();
-
+
const SmallVector<DbgScope *, 4> &Children = S->getScopes();
- if (!Children.empty())
+ if (!Children.empty())
for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
SE = Children.end(); SI != SE; ++SI)
WorkList.push_back(*SI);
if (S->isAbstractScope())
continue;
-
+
const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
if (Ranges.empty())
continue;
for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
- assert(RI->first && "DbgRange does not have first instruction!");
- assert(RI->second && "DbgRange does not have second instruction!");
+ assert(RI->first && "DbgRange does not have first instruction!");
+ assert(RI->second && "DbgRange does not have second instruction!");
InsnsEndScopeSet.insert(RI->second);
}
}
@@ -2680,20 +2742,23 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// function.
DebugLoc FDL = FindFirstDebugLoc(MF);
if (FDL.isUnknown()) return;
-
+
const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
-
+ const MDNode *TheScope = 0;
+
DISubprogram SP = getDISubprogram(Scope);
unsigned Line, Col;
if (SP.Verify()) {
Line = SP.getLineNumber();
Col = 0;
+ TheScope = SP;
} else {
Line = FDL.getLine();
Col = FDL.getCol();
+ TheScope = Scope;
}
-
- recordSourceLine(Line, Col, Scope);
+
+ recordSourceLine(Line, Col, TheScope);
/// ProcessedArgs - Collection of arguments already processed.
SmallPtrSet<const MDNode *, 8> ProcessedArgs;
@@ -2710,7 +2775,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
// If DBG_VALUE is for a local variable then it needs a label.
- if (DV.getTag() != dwarf::DW_TAG_arg_variable
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable
&& isDbgValueInUndefinedReg(MI) == false)
InsnNeedsLabel.insert(MI);
// DBG_VALUE for inlined functions argument needs a label.
@@ -2718,10 +2783,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
describes(MF->getFunction()))
InsnNeedsLabel.insert(MI);
// DBG_VALUE indicating argument location change needs a label.
- else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV))
+ else if (isDbgValueInUndefinedReg(MI) == false
+ && !ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
- // If location is unknown then instruction needs a location only if
+ // If location is unknown then instruction needs a location only if
// UnknownLocations flag is set.
if (DL.isUnknown()) {
if (UnknownLocations && !PrevLoc.isUnknown())
@@ -2730,7 +2796,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Otherwise, instruction needs a location only if it is new location.
InsnNeedsLabel.insert(MI);
}
-
+
if (!DL.isUnknown() || UnknownLocations)
PrevLoc = DL;
}
@@ -2750,7 +2816,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
+
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
@@ -2764,7 +2830,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
SectionLineInfos.insert(SectionLineInfos.end(),
Lines.begin(), Lines.end());
}
-
+
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2775,11 +2841,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
if (FName.empty())
FName = SP.getName();
const Module *M = MF->getFunction()->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
getRealLinkageName(FName)))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !ProcessedVars.insert(DV))
continue;
DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
@@ -2793,9 +2859,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
}
DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
-
+
if (!DisableFramePointerElim(*MF))
- addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
+ addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
dwarf::DW_FORM_flag, 1);
@@ -2849,22 +2915,22 @@ const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
else return I->second;
}
-/// findDbgScope - Find DbgScope for the debug loc attached with an
+/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
DbgScope *Scope = NULL;
- LLVMContext &Ctx =
+ LLVMContext &Ctx =
MInsn->getParent()->getParent()->getFunction()->getContext();
DebugLoc DL = MInsn->getDebugLoc();
- if (DL.isUnknown())
+ if (DL.isUnknown())
return Scope;
if (const MDNode *IA = DL.getInlinedAt(Ctx))
Scope = ConcreteScopes.lookup(IA);
if (Scope == 0)
Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-
+
return Scope;
}
@@ -2872,7 +2938,7 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
const MDNode *S) {
StringRef Dir;
StringRef Fn;
@@ -2899,16 +2965,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
Src = GetOrCreateSourceID(Dir, Fn);
}
-#if 0
- if (!Lines.empty()) {
- SrcLineInfo lastSrcLineInfo = Lines.back();
- // Emitting sequential line records with the same line number (but
- // different addresses) seems to confuse GDB. Avoid this.
- if (lastSrcLineInfo.getLine() == Line)
- return NULL;
- }
-#endif
-
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
@@ -2991,7 +3047,7 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
const char *SymbolStem = 0) {
Asm->OutStreamer.SwitchSection(Section);
if (!SymbolStem) return 0;
-
+
MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
Asm->OutStreamer.EmitLabel(TmpSym);
return TmpSym;
@@ -3008,21 +3064,20 @@ void DwarfDebug::EmitSectionLabels() {
EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame");
}
- DwarfInfoSectionSym =
+ DwarfInfoSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- DwarfAbbrevSectionSym =
+ DwarfAbbrevSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
+
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
EmitSectionSym(Asm, MacroInfo);
- DwarfDebugLineSectionSym =
- EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
- DwarfStrSectionSym =
+ DwarfStrSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
@@ -3060,7 +3115,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
if (Asm->isVerbose())
Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
-
+
switch (Attr) {
case dwarf::DW_AT_sibling:
Asm->EmitInt32(Die->getSiblingOffset());
@@ -3075,15 +3130,17 @@ void DwarfDebug::emitDIE(DIE *Die) {
case dwarf::DW_AT_ranges: {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
- V->getValue(),
- DwarfDebugRangeSectionSym,
- 4);
- break;
- }
- case dwarf::DW_AT_stmt_list: {
- Asm->EmitLabelDifference(CurrentLineSectionSym,
- DwarfDebugLineSectionSym, 4);
+
+ if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
break;
}
case dwarf::DW_AT_location: {
@@ -3124,18 +3181,18 @@ void DwarfDebug::emitDebugInfo() {
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
DIE *Die = TheCU->getCUDie();
-
+
// Emit the compile units header.
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
TheCU->getID()));
-
+
// Emit size of content not including length itself
unsigned ContentSize = Die->getSize() +
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t) + // Pointer Size (in bytes)
sizeof(int32_t); // FIXME - extra pad for gdb bug.
-
+
Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
Asm->EmitInt32(ContentSize);
Asm->OutStreamer.AddComment("DWARF version number");
@@ -3145,7 +3202,7 @@ void DwarfDebug::emitDebugInfo() {
DwarfAbbrevSectionSym);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getTargetData().getPointerSize());
-
+
emitDIE(Die);
// FIXME - extra padding for gdb bug.
Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
@@ -3194,7 +3251,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
// Define last address of section.
Asm->OutStreamer.AddComment("Extended Op");
Asm->EmitInt8(0);
-
+
Asm->OutStreamer.AddComment("Op size");
Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
@@ -3231,15 +3288,13 @@ void DwarfDebug::emitDebugLines() {
Asm->getObjFileLowering().getDwarfLineSection());
// Construct the section header.
- CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin");
- Asm->OutStreamer.EmitLabel(CurrentLineSectionSym);
Asm->OutStreamer.AddComment("Length of Source Line Info");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
Asm->GetTempSymbol("line_begin"), 4);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
Asm->OutStreamer.AddComment("Prolog Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
@@ -3294,7 +3349,7 @@ void DwarfDebug::emitDebugLines() {
const std::string &FN = getSourceFileName(Id.second);
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
+
Asm->EmitULEB128(Id.first, "Directory #");
Asm->EmitULEB128(0, "Mod date");
Asm->EmitULEB128(0, "File size");
@@ -3338,18 +3393,18 @@ void DwarfDebug::emitDebugLines() {
Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
Asm->OutStreamer.AddComment("Location label");
Asm->OutStreamer.EmitSymbolValue(Label,
Asm->getTargetData().getPointerSize(),
0/*AddrSpace*/);
-
+
// If change of source, then switch to the new source.
if (Source != LineInfo.getSourceID()) {
Source = LineInfo.getSourceID();
Asm->OutStreamer.AddComment("DW_LNS_set_file");
- Asm->EmitInt8(dwarf::DW_LNS_set_file);
+ Asm->EmitInt8(dwarf::DW_LNS_set_file);
Asm->EmitULEB128(Source, "New Source");
}
@@ -3457,7 +3512,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
Asm->OutStreamer.EmitLabel(DebugFrameBegin);
Asm->OutStreamer.AddComment("FDE CIE offset");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
DwarfFrameSectionSym);
Asm->OutStreamer.AddComment("FDE initial location");
@@ -3466,8 +3521,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
Asm->getTargetData().getPointerSize(),
0/*AddrSpace*/);
-
-
+
+
Asm->OutStreamer.AddComment("FDE address range");
Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number),
FuncBeginSym, Asm->getTargetData().getPointerSize());
@@ -3487,41 +3542,41 @@ void DwarfDebug::emitDebugPubNames() {
// Start the dwarf pubnames section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfPubNamesSection());
-
+
Asm->OutStreamer.AddComment("Length of Public Names Info");
Asm->EmitLabelDifference(
Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
-
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
TheCU->getID()));
-
+
Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
DwarfInfoSectionSym);
-
+
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
Asm->GetTempSymbol("info_begin", TheCU->getID()),
4);
-
+
const StringMap<DIE*> &Globals = TheCU->getGlobals();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
DIE *Entity = GI->second;
-
+
Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
-
+
if (Asm->isVerbose())
Asm->OutStreamer.AddComment("External Name");
Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
}
-
+
Asm->OutStreamer.AddComment("End Mark");
Asm->EmitInt32(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
@@ -3540,37 +3595,37 @@ void DwarfDebug::emitDebugPubTypes() {
Asm->EmitLabelDifference(
Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
-
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
TheCU->getID()));
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
Asm->EmitInt16(dwarf::DWARF_VERSION);
-
+
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
DwarfInfoSectionSym);
-
+
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
Asm->GetTempSymbol("info_begin", TheCU->getID()),
4);
-
+
const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
DIE * Entity = GI->second;
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
}
-
+
Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
+ Asm->EmitInt32(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
TheCU->getID()));
}
@@ -3581,26 +3636,26 @@ void DwarfDebug::emitDebugPubTypes() {
void DwarfDebug::emitDebugStr() {
// Check to see if it is worth the effort.
if (StringPool.empty()) return;
-
+
// Start the dwarf str section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfStrSection());
// Get all of the string pool entries and put them in an array by their ID so
// we can sort them.
- SmallVector<std::pair<unsigned,
+ SmallVector<std::pair<unsigned,
StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
-
+
for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
Entries.push_back(std::make_pair(I->second.second, &*I));
-
+
array_pod_sort(Entries.begin(), Entries.end());
-
+
for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
// Emit a label for reference from debug information entries.
Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
-
+
// Emit the string itself.
Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
}
@@ -3618,8 +3673,8 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getTargetData().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
- I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
DotDebugLocEntry Entry = *I;
if (Entry.isEmpty()) {
@@ -3631,15 +3686,30 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
- if (Reg < 32) {
+ if (int Offset = Entry.Loc.getOffset()) {
+ // If the value is at a certain offset from frame register then
+ // use DW_OP_fbreg.
+ unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
Asm->OutStreamer.AddComment("Loc expr size");
- Asm->EmitInt16(1);
- Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ Asm->EmitInt16(1 + OffsetSize);
+ Asm->OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
+ Asm->EmitInt8(dwarf::DW_OP_fbreg);
+ Asm->OutStreamer.AddComment("Offset");
+ Asm->EmitSLEB128(Offset);
} else {
- Asm->OutStreamer.AddComment("Loc expr size");
- Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg));
- Asm->EmitInt8(dwarf::DW_OP_regx);
- Asm->EmitULEB128(Reg);
+ if (Reg < 32) {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1);
+ Asm->OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg));
+ Asm->EmitInt8(dwarf::DW_OP_regx);
+ Asm->EmitULEB128(Reg);
+ }
}
}
}
@@ -3661,7 +3731,7 @@ void DwarfDebug::emitDebugRanges() {
Asm->getObjFileLowering().getDwarfRangesSection());
unsigned char Size = Asm->getTargetData().getPointerSize();
for (SmallVector<const MCSymbol *, 8>::iterator
- I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
if (*I)
Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
@@ -3734,7 +3804,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (LName.empty()) {
Asm->OutStreamer.EmitBytes(Name, 0);
Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
- } else
+ } else
Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
DwarfStrSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 5a281c851748..f0ff3bc71699 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -261,7 +261,6 @@ class DwarfDebug {
MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
MCSymbol *DwarfDebugLocSectionSym;
- MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
DIEInteger *DIEIntegerOne;
@@ -338,11 +337,11 @@ private:
/// addSourceLine - Add location information to specified debug information
/// entry.
- void addSourceLine(DIE *Die, const DIVariable *V);
- void addSourceLine(DIE *Die, const DIGlobalVariable *G);
- void addSourceLine(DIE *Die, const DISubprogram *SP);
- void addSourceLine(DIE *Die, const DIType *Ty);
- void addSourceLine(DIE *Die, const DINameSpace *NS);
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
/// addAddress - Add an address attribute to a die based on the location
/// provided.
@@ -376,6 +375,10 @@ private:
void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
const MachineLocation &Location);
+ /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+ /// on provided frame index.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI);
+
/// addToContextOwner - Add Die into the list of its context owner's children.
void addToContextOwner(DIE *Die, DIDescriptor Context);
@@ -414,14 +417,11 @@ private:
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
DIE *constructEnumTypeDIE(DIEnumerator ETy);
- /// createGlobalVariableDIE - Create new DIE using GV.
- DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
-
/// createMemberDIE - Create new member DIE.
- DIE *createMemberDIE(const DIDerivedType &DT);
+ DIE *createMemberDIE(DIDerivedType DT);
/// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
+ DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
/// getOrCreateDbgScope - Create DbgScope for the scope.
DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -560,12 +560,6 @@ private:
/// construct SubprogramDIE - Construct subprogram DIE.
void constructSubprogramDIE(const MDNode *N);
- // FIXME: This should go away in favor of complex addresses.
- /// Find the type the programmer originally declared the variable to be
- /// and return that type. Obsolete, use GetComplexAddrType instead.
- ///
- DIType getBlockByrefType(DIType Ty, std::string Name);
-
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index c87284083cde..86a368831e0e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -894,7 +894,7 @@ void DwarfException::EndModule() {
if (!shouldEmitMovesModule && !shouldEmitTableModule)
return;
- const std::vector<const Function *> Personalities = MMI->getPersonalities();
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
EmitCIE(Personalities[I], I);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 7f98df0d22ea..cb81aa3c88ce 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -65,7 +65,7 @@ namespace {
public:
static char ID;
explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+ : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ffeff1ee27a6..2ef115dbd205 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
LiveIntervalAnalysis.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
+ LocalStackSlotAllocation.cpp
LowerSubregs.cpp
MachineBasicBlock.cpp
MachineCSE.cpp
@@ -42,10 +43,10 @@ add_llvm_library(LLVMCodeGen
MachineVerifier.cpp
ObjectCodeEmitter.cpp
OcamlGC.cpp
- OptimizeExts.cpp
OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
+ PeepholeOptimizer.cpp
PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreAllocSplitting.cpp
@@ -57,6 +58,7 @@ add_llvm_library(LLVMCodeGen
RegAllocPBQP.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
+ RenderMachineFunction.cpp
ScheduleDAG.cpp
ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
@@ -67,6 +69,8 @@ add_llvm_library(LLVMCodeGen
SjLjEHPrepare.cpp
SlotIndexes.cpp
Spiller.cpp
+ SplitKit.cpp
+ Splitter.cpp
StackProtector.cpp
StackSlotColoring.cpp
StrongPHIElimination.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 240a7b94fccf..1b7e08a8b6bb 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,8 @@
using namespace llvm;
char CalculateSpillWeights::ID = 0;
-static RegisterPass<CalculateSpillWeights> X("calcspillweights",
- "Calculate spill weights");
+INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false);
void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<LiveIntervals>();
@@ -41,108 +41,184 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< fn.getFunction()->getName() << '\n');
- LiveIntervals *lis = &getAnalysis<LiveIntervals>();
- MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
- const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
- MachineRegisterInfo *mri = &fn.getRegInfo();
-
- SmallSet<unsigned, 4> processed;
- for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
- mbbi != mbbe; ++mbbi) {
- MachineBasicBlock* mbb = mbbi;
- SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
- MachineLoop* loop = loopInfo->getLoopFor(mbb);
- unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
- bool isExiting = loop ? loop->isLoopExiting(mbb) : false;
-
- for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
- mii != mie; ++mii) {
- const MachineInstr *mi = mii;
- if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue())
- continue;
-
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- const MachineOperand &mopi = mi->getOperand(i);
- if (!mopi.isReg() || mopi.getReg() == 0)
- continue;
- unsigned reg = mopi.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
- continue;
- // Multiple uses of reg by the same instruction. It should not
- // contribute to spill weight again.
- if (!processed.insert(reg))
- continue;
-
- bool hasDef = mopi.isDef();
- bool hasUse = !hasDef;
- for (unsigned j = i+1; j != e; ++j) {
- const MachineOperand &mopj = mi->getOperand(j);
- if (!mopj.isReg() || mopj.getReg() != reg)
- continue;
- hasDef |= mopj.isDef();
- hasUse |= mopj.isUse();
- if (hasDef && hasUse)
- break;
- }
-
- LiveInterval &regInt = lis->getInterval(reg);
- float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
- if (hasDef && isExiting) {
- // Looks like this is a loop count variable update.
- SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
- const LiveRange *dlr =
- lis->getInterval(reg).getLiveRangeContaining(defIdx);
- if (dlr->end >= mbbEnd)
- weight *= 3.0F;
- }
- regInt.weight += weight;
- }
- processed.clear();
- }
+ LiveIntervals &lis = getAnalysis<LiveIntervals>();
+ VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
+ for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
+ LiveInterval &li = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(li.reg))
+ vrai.CalculateWeightAndHint(li);
+ }
+ return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
}
- for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
- LiveInterval &li = *I->second;
- if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
- // If the live interval length is essentially zero, i.e. in every live
- // range the use follows def immediately, it doesn't make sense to spill
- // it and hope it will be easier to allocate for this li.
- if (isZeroLengthInterval(&li)) {
- li.weight = HUGE_VALF;
- continue;
- }
-
- bool isLoad = false;
- SmallVector<LiveInterval*, 4> spillIs;
- if (lis->isReMaterializable(li, spillIs, isLoad)) {
- // If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If none of the defs are
- // loads, then it's potentially very cheap to re-materialize.
- // FIXME: this gets much more complicated once we support non-trivial
- // re-materialization.
- if (isLoad)
- li.weight *= 0.9F;
- else
- li.weight *= 0.5F;
- }
-
- // Slightly prefer live interval that has been assigned a preferred reg.
- std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
- if (Hint.first || Hint.second)
- li.weight *= 1.01F;
-
- lis->normalizeSpillWeight(li);
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = mf_.getRegInfo();
+ const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+ MachineBasicBlock *mbb = 0;
+ MachineLoop *loop = 0;
+ unsigned loopDepth = 0;
+ bool isExiting = false;
+ float totalWeight = 0;
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hist and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+ MachineInstr *mi = I.skipInstruction();) {
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi))
+ continue;
+
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = loops_.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ float hweight = hint_[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && lis_.isAllocatable(hint))
+ bestPhys = hweight, hintPhys = hint;
+ } else {
+ if (hweight > bestVirt)
+ bestVirt = hweight, hintVirt = hint;
}
}
-
- return false;
+
+ hint_.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weifght of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // Mark li as unspillable if all live ranges are tiny.
+ if (li.isZeroLength()) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If none of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> spillIs;
+ if (lis_.isReMaterializable(li, spillIs, isLoad)) {
+ if (isLoad)
+ totalWeight *= 0.9F;
+ else
+ totalWeight *= 0.5F;
+ }
+
+ li.weight = totalWeight;
+ lis_.normalizeSpillWeight(li);
}
-/// Returns true if the given live interval is zero length.
-bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const {
- for (LiveInterval::Ranges::const_iterator
- i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
- if (i->end.getPrevIndex() > i->start)
- return false;
- return true;
+void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
+ MachineRegisterInfo &mri = mf_.getRegInfo();
+ const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
+ const TargetRegisterClass *orc = mri.getRegClass(reg);
+ SmallPtrSet<const TargetRegisterClass*,8> rcs;
+
+ for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
+ E = mri.reg_nodbg_end(); I != E; ++I) {
+ // The targets don't have accurate enough regclass descriptions that we can
+ // handle subregs. We need something similar to
+ // TRI::getMatchingSuperRegClass, but returning a super class instead of a
+ // sub class.
+ if (I.getOperand().getSubReg()) {
+ DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
+ return;
+ }
+ if (const TargetRegisterClass *rc =
+ I->getDesc().getRegClass(I.getOperandNo(), tri))
+ rcs.insert(rc);
+ }
+
+ // If we found no regclass constraints, just leave reg as is.
+ // In theory, we could inflate to the largest superclass of reg's existing
+ // class, but that might not be legal for the current cpu setting.
+ // This could happen if reg is only used by COPY instructions, so we may need
+ // to improve on this.
+ if (rcs.empty()) {
+ return;
+ }
+
+ // Compute the intersection of all classes in rcs.
+ // This ought to be independent of iteration order, but if the target register
+ // classes don't form a proper algebra, it is possible to get different
+ // results. The solution is to make sure the intersection of any two register
+ // classes is also a register class or the null set.
+ const TargetRegisterClass *rc = 0;
+ for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
+ E = rcs.end(); I != E; ++I) {
+ rc = rc ? getCommonSubClass(rc, *I) : *I;
+ assert(rc && "Incompatible regclass constraints found");
+ }
+
+ if (rc == orc)
+ return;
+ DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
+ << rc->getName() <<".\n");
+ mri.setRegClass(reg, rc);
}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index e0e315c6c677..91a9536e7757 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID;
- CodePlacementOpt() : MachineFunctionPass(&ID) {}
+ CodePlacementOpt() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index e3746a985644..335d2d8e9bac 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -32,21 +32,21 @@ CriticalAntiDepBreaker(MachineFunction& MFi) :
MRI(MF.getRegInfo()),
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
- AllocatableSet(TRI->getAllocatableSet(MF))
-{
-}
+ AllocatableSet(TRI->getAllocatableSet(MF)),
+ Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
- // Clear out the register class data.
- std::fill(Classes, array_endof(Classes),
- static_cast<const TargetRegisterClass *>(0));
-
- // Initialize the indices to indicate that no registers are live.
const unsigned BBSize = BB->size();
- for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+ // Initialize the indices to indicate that no registers are live.
KillIndices[i] = ~0u;
DefIndices[i] = BBSize;
}
@@ -65,6 +65,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -86,6 +87,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -106,6 +108,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -134,8 +137,10 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+
// Mark this register to be non-renamable.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
@@ -325,6 +330,8 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
RE = RC->allocation_order_end(MF); R != RE; ++R) {
unsigned NewReg = *R;
+ // Don't consider non-allocatable registers
+ if (!AllocatableSet.test(NewReg)) continue;
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
@@ -433,7 +440,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// fix that remaining critical edge too. This is a little more involved,
// because unlike the most recent register, less recent registers should
// still be considered, though only if no other registers are available.
- unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
// Attempt to break anti-dependence edges on the critical path. Walk the
// instructions from the bottom up, tracking information about liveness
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 540630083bcc..0ed7c35b0f0c 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -46,19 +46,18 @@ class TargetRegisterInfo;
/// corresponding value is null. If the register is live but used in
/// multiple register classes, the corresponding value is -1 casted to a
/// pointer.
- const TargetRegisterClass *
- Classes[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<const TargetRegisterClass*> Classes;
/// RegRegs - Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
/// DefIndices - The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
- unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> DefIndices;
/// KeepRegs - A set of registers which are live and cannot be changed to
/// break anti-dependencies.
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index d69c995b3e03..318d922adebf 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
private:
bool isDead(const MachineInstr *MI) const;
@@ -44,9 +44,8 @@ namespace {
}
char DeadMachineInstructionElim::ID = 0;
-static RegisterPass<DeadMachineInstructionElim>
-Y("dead-mi-elimination",
- "Remove dead machine instructions");
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false);
FunctionPass *llvm::createDeadMachineInstructionElimPass() {
return new DeadMachineInstructionElim();
@@ -81,9 +80,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
- // Compute a bitvector to represent all non-allocatable physregs.
- BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
- NonAllocatableRegs.flip();
+ // Treat reserved registers as always live.
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -92,9 +90,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
I != E; ++I) {
MachineBasicBlock *MBB = &*I;
- // Start out assuming that all non-allocatable registers are live
- // out of this block.
- LivePhysRegs = NonAllocatableRegs;
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
if (!MBB->empty() && MBB->back().getDesc().isReturn())
@@ -105,6 +102,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
LivePhysRegs.set(Reg);
}
+ // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
+ // are not live across blocks, but some targets (x86) can have flags live
+ // out of a block.
+
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 01b31b420931..550fd3e25fb7 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -25,19 +25,17 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
-STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
namespace {
class DwarfEHPrepare : public FunctionPass {
const TargetMachine *TM;
const TargetLowering *TLI;
- bool CompileFast;
// The eh.exception intrinsic.
Function *ExceptionValueIntrinsic;
@@ -54,9 +52,8 @@ namespace {
// _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
- // Dominator info is used when turning stack temporaries into registers.
+ // We both use and preserve dominator info.
DominatorTree *DT;
- DominanceFrontier *DF;
// The function we are running on.
Function *F;
@@ -65,28 +62,14 @@ namespace {
typedef SmallPtrSet<BasicBlock*, 8> BBSet;
BBSet LandingPads;
- // Stack temporary used to hold eh.exception values.
- AllocaInst *ExceptionValueVar;
-
bool NormalizeLandingPads();
bool LowerUnwinds();
bool MoveExceptionValueCalls();
- bool FinishStackTemporaries();
- bool PromoteStackTemporaries();
Instruction *CreateExceptionValueCall(BasicBlock *BB);
- Instruction *CreateValueLoad(BasicBlock *BB);
-
- /// CreateReadOfExceptionValue - Return the result of the eh.exception
- /// intrinsic by calling the intrinsic if in a landing pad, or loading it
- /// from the exception value variable otherwise.
- Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
- return LandingPads.count(BB) ?
- CreateExceptionValueCall(BB) : CreateValueLoad(BB);
- }
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
- /// use the ".llvm.eh.catch.all.value" call need to convert to using its
+ /// use the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
@@ -112,69 +95,19 @@ namespace {
bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
- /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a
- /// register.
- bool DoMem2RegPromotion(Value *V) {
- AllocaInst *AI = dyn_cast<AllocaInst>(V);
- if (!AI || !isAllocaPromotable(AI)) return false;
-
- // Turn the alloca into a register.
- std::vector<AllocaInst*> Allocas(1, AI);
- PromoteMemToReg(Allocas, *DT, *DF);
- return true;
- }
-
- /// PromoteStoreInst - Perform Mem2Reg on a StoreInst.
- bool PromoteStoreInst(StoreInst *SI) {
- if (!SI || !DT || !DF) return false;
- if (DoMem2RegPromotion(SI->getOperand(1)))
- return true;
- return false;
- }
-
- /// PromoteEHPtrStore - Promote the storing of an EH pointer into a
- /// register. This should get rid of the store and subsequent loads.
- bool PromoteEHPtrStore(IntrinsicInst *II) {
- if (!DT || !DF) return false;
-
- bool Changed = false;
- StoreInst *SI;
-
- while (1) {
- SI = 0;
- for (Value::use_iterator
- I = II->use_begin(), E = II->use_end(); I != E; ++I) {
- SI = dyn_cast<StoreInst>(I);
- if (SI) break;
- }
-
- if (!PromoteStoreInst(SI))
- break;
-
- Changed = true;
- }
-
- return Changed;
- }
-
public:
static char ID; // Pass identification, replacement for typeid.
- DwarfEHPrepare(const TargetMachine *tm, bool fast) :
- FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()),
- CompileFast(fast),
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
virtual bool runOnFunction(Function &Fn);
- // getAnalysisUsage - We need dominance frontiers for memory promotion.
+ // getAnalysisUsage - We need the dominator tree for handling URoR.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (!CompileFast)
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
- if (!CompileFast)
- AU.addRequired<DominanceFrontier>();
- AU.addPreserved<DominanceFrontier>();
}
const char *getPassName() const {
@@ -186,8 +119,8 @@ namespace {
char DwarfEHPrepare::ID = 0;
-FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
- return new DwarfEHPrepare(tm, fast);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
}
/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
@@ -207,7 +140,7 @@ FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
for (Value::use_iterator
I = SelectorIntrinsic->use_begin(),
E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
+ IntrinsicInst *II = cast<IntrinsicInst>(*I);
if (II->getParent()->getParent() != F)
continue;
@@ -225,13 +158,13 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
for (Value::use_iterator
I = URoR->use_begin(),
E = URoR->use_end(); I != E; ++I) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
URoRInvokes.insert(II);
}
}
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the ".llvm.eh.catch.all.value" call need to convert to using its
+/// the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
if (!EHCatchAllValue) return false;
@@ -247,7 +180,7 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
I = Sels.begin(), E = Sels.end(); I != E; ++I) {
IntrinsicInst *Sel = *I;
- // Index of the ".llvm.eh.catch.all.value" variable.
+ // Index of the "llvm.eh.catch.all.value" variable.
unsigned OpIdx = Sel->getNumArgOperands() - 1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
if (GV != EHCatchAllValue) continue;
@@ -268,10 +201,9 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
SmallPtrSet<PHINode*, 32> SeenPHIs;
bool Changed = false;
- restart:
for (Value::use_iterator
I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
- Instruction *II = dyn_cast<Instruction>(I);
+ Instruction *II = dyn_cast<Instruction>(*I);
if (!II || II->getParent()->getParent() != F) continue;
if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
@@ -282,11 +214,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
URoRInvoke = true;
} else if (CastInst *CI = dyn_cast<CastInst>(II)) {
Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
- if (!PromoteStoreInst(SI)) continue;
- Changed = true;
- SeenPHIs.clear();
- goto restart; // Uses may have changed, restart loop.
} else if (PHINode *PN = dyn_cast<PHINode>(II)) {
if (SeenPHIs.insert(PN))
// Don't process a PHI node more than once.
@@ -304,7 +231,7 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
bool DwarfEHPrepare::HandleURoRInvokes() {
if (!EHCatchAllValue) {
EHCatchAllValue =
- F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
+ F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
if (!EHCatchAllValue) return false;
}
@@ -318,10 +245,6 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
FindAllCleanupSelectors(Sels, CatchAllSels);
- if (!DT)
- // We require DominatorTree information.
- return CleanupSelectors(CatchAllSels);
-
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
if (!URoR) return CleanupSelectors(CatchAllSels);
@@ -338,7 +261,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
for (SmallPtrSet<InvokeInst*, 32>::iterator
UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
const BasicBlock *URoRBB = (*UI)->getParent();
- if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) {
+ if (DT->dominates(SelBB, URoRBB)) {
SelsToConvert.insert(*SI);
break;
}
@@ -360,11 +283,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
for (Value::use_iterator
I = ExceptionValueIntrinsic->use_begin(),
E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I);
+ IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
- Changed |= PromoteEHPtrStore(EHPtr);
-
bool URoRInvoke = false;
SmallPtrSet<IntrinsicInst*, 8> SelCalls;
Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
@@ -532,11 +453,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
// Add a fallthrough from NewBB to the original landing pad.
BranchInst::Create(LPad, NewBB);
- // Now update DominatorTree and DominanceFrontier analysis information.
- if (DT)
- DT->splitBlock(NewBB);
- if (DF)
- DF->splitBlock(NewBB);
+ // Now update DominatorTree analysis information.
+ DT->splitBlock(NewBB);
// Remember the newly constructed landing pad. The original landing pad
// LPad is no longer a landing pad now that all unwind edges have been
@@ -586,7 +504,7 @@ bool DwarfEHPrepare::LowerUnwinds() {
// Create the call...
CallInst *CI = CallInst::Create(RewindFunction,
- CreateReadOfExceptionValue(TI->getParent()),
+ CreateExceptionValueCall(TI->getParent()),
"", TI);
CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// ...followed by an UnreachableInst.
@@ -602,9 +520,11 @@ bool DwarfEHPrepare::LowerUnwinds() {
}
/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with loads from a
-/// stack temporary. Move eh.exception calls inside landing pads to the start
-/// of the landing pad (optional, but may make things simpler for later passes).
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register. Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
bool DwarfEHPrepare::MoveExceptionValueCalls() {
// If the eh.exception intrinsic is not declared in the module then there is
// nothing to do. Speed up compilation by checking for this common case.
@@ -614,61 +534,87 @@ bool DwarfEHPrepare::MoveExceptionValueCalls() {
bool Changed = false;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
- if (CI->getIntrinsicID() == Intrinsic::eh_exception) {
- if (!CI->use_empty()) {
- Value *ExceptionValue = CreateReadOfExceptionValue(BB);
- if (CI == ExceptionValue) {
- // The call was at the start of a landing pad - leave it alone.
- assert(LandingPads.count(BB) &&
- "Created eh.exception call outside landing pad!");
- continue;
- }
- CI->replaceAllUsesWith(ExceptionValue);
- }
- CI->eraseFromParent();
- ++NumExceptionValuesMoved;
- Changed = true;
+ // Move calls to eh.exception that are inside a landing pad to the start of
+ // the landing pad.
+ for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ BasicBlock *LP = *LI;
+ for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception.
+ if (!EI->use_empty()) {
+ // If there is already a call to eh.exception at the start of the
+ // landing pad, then get hold of it; otherwise create such a call.
+ Value *CallAtStart = CreateExceptionValueCall(LP);
+
+ // If the call was at the start of a landing pad then leave it alone.
+ if (EI == CallAtStart)
+ continue;
+ EI->replaceAllUsesWith(CallAtStart);
}
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
}
- return Changed;
-}
-
-/// FinishStackTemporaries - If we introduced a stack variable to hold the
-/// exception value then initialize it in each landing pad.
-bool DwarfEHPrepare::FinishStackTemporaries() {
- if (!ExceptionValueVar)
- // Nothing to do.
- return false;
+ // Look for calls to eh.exception that are not in a landing pad. If one is
+ // found, then a register that holds the exception value will be created in
+ // each landing pad, and the SSAUpdater will be used to compute the values
+ // returned by eh.exception calls outside of landing pads.
+ SSAUpdater SSA;
+
+ // Remember where we found the eh.exception call, to avoid rescanning earlier
+ // basic blocks which we already know contain no eh.exception calls.
+ bool FoundCallOutsideLandingPad = false;
+ Function::iterator BB = F->begin();
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
- bool Changed = false;
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II)
+ if (isa<EHExceptionInst>(II)) {
+ SSA.Initialize(II->getType(), II->getName());
+ FoundCallOutsideLandingPad = true;
+ break;
+ }
- // Make sure that there is a store of the exception value at the start of
- // each landing pad.
- for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI) {
- Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI);
- Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar);
- Store->insertAfter(ExceptionValue);
- Changed = true;
+ if (FoundCallOutsideLandingPad)
+ break;
}
- return Changed;
-}
+ // If all calls to eh.exception are in landing pads then we are done.
+ if (!FoundCallOutsideLandingPad)
+ return Changed;
-/// PromoteStackTemporaries - Turn any stack temporaries we introduced into
-/// registers if possible.
-bool DwarfEHPrepare::PromoteStackTemporaries() {
- if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
- // Turn the exception temporary into registers and phi nodes if possible.
- std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
- PromoteMemToReg(Allocas, *DT, *DF);
- return true;
+ // Add a call to eh.exception at the start of each landing pad, and tell the
+ // SSAUpdater that this is the value produced by the landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI)
+ SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+ // Now turn all calls to eh.exception that are not in a landing pad into a use
+ // of the appropriate register.
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception, replace it with the value from any
+ // upstream landing pad(s).
+ EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ }
}
- return false;
+
+ return true;
}
/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
@@ -691,36 +637,11 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
}
-/// CreateValueLoad - Insert a load of the exception value stack variable
-/// (creating it if necessary) at the start of the basic block (unless
-/// there already is a load, in which case the existing load is returned).
-Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
- Instruction *Start = BB->getFirstNonPHIOrDbg();
- // Is this a load of the exception temporary?
- if (ExceptionValueVar)
- if (LoadInst* LI = dyn_cast<LoadInst>(Start))
- if (LI->getPointerOperand() == ExceptionValueVar)
- // Reuse the existing load.
- return Start;
-
- // Create the temporary if we didn't already.
- if (!ExceptionValueVar) {
- ExceptionValueVar = new AllocaInst(PointerType::getUnqual(
- Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin());
- ++NumStackTempsIntroduced;
- }
-
- // Load the value.
- return new LoadInst(ExceptionValueVar, "eh.value.load", Start);
-}
-
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
bool Changed = false;
// Initialize internal state.
- DT = getAnalysisIfAvailable<DominatorTree>();
- DF = getAnalysisIfAvailable<DominanceFrontier>();
- ExceptionValueVar = 0;
+ DT = &getAnalysis<DominatorTree>();
F = &Fn;
// Ensure that only unwind edges end at landing pads (a landing pad is a
@@ -735,13 +656,6 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) {
// Move eh.exception calls to landing pads.
Changed |= MoveExceptionValueCalls();
- // Initialize any stack temporaries we introduced.
- Changed |= FinishStackTemporaries();
-
- // Turn any stack temporaries into registers if possible.
- if (!CompileFast)
- Changed |= PromoteStackTemporaries();
-
Changed |= HandleURoRInvokes();
LandingPads.clear();
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index cb5a8c0eae1d..fb884c9e8b71 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -22,36 +22,12 @@
#include "llvm/CodeGen/BinaryObject.h"
#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/ELF.h"
#include "llvm/System/DataTypes.h"
namespace llvm {
class GlobalValue;
- // Identification Indexes
- enum {
- EI_MAG0 = 0,
- EI_MAG1 = 1,
- EI_MAG2 = 2,
- EI_MAG3 = 3
- };
-
- // File types
- enum {
- ET_NONE = 0, // No file type
- ET_REL = 1, // Relocatable file
- ET_EXEC = 2, // Executable file
- ET_DYN = 3, // Shared object file
- ET_CORE = 4, // Core file
- ET_LOPROC = 0xff00, // Beginning of processor-specific codes
- ET_HIPROC = 0xffff // Processor-specific
- };
-
- // Versioning
- enum {
- EV_NONE = 0,
- EV_CURRENT = 1
- };
-
/// ELFSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
@@ -108,9 +84,9 @@ namespace llvm {
static ELFSym *getExtSym(const char *Ext) {
ELFSym *Sym = new ELFSym();
Sym->Source.Ext = Ext;
- Sym->setBind(STB_GLOBAL);
- Sym->setType(STT_NOTYPE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_GLOBAL);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SourceType = isExtSym;
return Sym;
}
@@ -118,9 +94,9 @@ namespace llvm {
// getSectionSym - Returns a elf symbol to represent an elf section
static ELFSym *getSectionSym() {
ELFSym *Sym = new ELFSym();
- Sym->setBind(STB_LOCAL);
- Sym->setType(STT_SECTION);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_SECTION);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SourceType = isOther;
return Sym;
}
@@ -128,9 +104,9 @@ namespace llvm {
// getFileSym - Returns a elf symbol to represent the module identifier
static ELFSym *getFileSym() {
ELFSym *Sym = new ELFSym();
- Sym->setBind(STB_LOCAL);
- Sym->setType(STT_FILE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_FILE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
Sym->SourceType = isOther;
return Sym;
@@ -141,8 +117,8 @@ namespace llvm {
ELFSym *Sym = new ELFSym();
Sym->Source.GV = GV;
Sym->setBind(Bind);
- Sym->setType(STT_NOTYPE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
Sym->SourceType = isGV;
return Sym;
@@ -159,35 +135,14 @@ namespace llvm {
// Symbol index into the Symbol table
unsigned SymTabIdx;
- enum {
- STB_LOCAL = 0, // Local sym, not visible outside obj file containing def
- STB_GLOBAL = 1, // Global sym, visible to all object files being combined
- STB_WEAK = 2 // Weak symbol, like global but lower-precedence
- };
-
- enum {
- STT_NOTYPE = 0, // Symbol's type is not specified
- STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.)
- STT_FUNC = 2, // Symbol is executable code (function, etc.)
- STT_SECTION = 3, // Symbol refers to a section
- STT_FILE = 4 // Local, absolute symbol that refers to a file
- };
-
- enum {
- STV_DEFAULT = 0, // Visibility is specified by binding type
- STV_INTERNAL = 1, // Defined by processor supplements
- STV_HIDDEN = 2, // Not visible to other components
- STV_PROTECTED = 3 // Visible in other components but not preemptable
- };
-
ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
- Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0),
+ Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
SymTabIdx(0) {}
unsigned getBind() const { return (Info >> 4) & 0xf; }
unsigned getType() const { return Info & 0xf; }
- bool isLocalBind() const { return getBind() == STB_LOCAL; }
- bool isFileType() const { return getType() == STT_FILE; }
+ bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
+ bool isFileType() const { return getType() == ELF::STT_FILE; }
void setBind(unsigned X) {
assert(X == (X & 0xF) && "Bind value out of range!");
@@ -222,51 +177,6 @@ namespace llvm {
unsigned Align; // sh_addralign - Alignment of section.
unsigned EntSize; // sh_entsize - Size of entries in the section e
- // Section Header Flags
- enum {
- SHF_WRITE = 1 << 0, // Writable
- SHF_ALLOC = 1 << 1, // Mapped into the process addr space
- SHF_EXECINSTR = 1 << 2, // Executable
- SHF_MERGE = 1 << 4, // Might be merged if equal
- SHF_STRINGS = 1 << 5, // Contains null-terminated strings
- SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
- SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
- SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
- SHF_GROUP = 1 << 9, // Section is a member of a group
- SHF_TLS = 1 << 10 // Section holds thread-local data
- };
-
- // Section Types
- enum {
- SHT_NULL = 0, // No associated section (inactive entry).
- SHT_PROGBITS = 1, // Program-defined contents.
- SHT_SYMTAB = 2, // Symbol table.
- SHT_STRTAB = 3, // String table.
- SHT_RELA = 4, // Relocation entries; explicit addends.
- SHT_HASH = 5, // Symbol hash table.
- SHT_DYNAMIC = 6, // Information for dynamic linking.
- SHT_NOTE = 7, // Information about the file.
- SHT_NOBITS = 8, // Data occupies no space in the file.
- SHT_REL = 9, // Relocation entries; no explicit addends.
- SHT_SHLIB = 10, // Reserved.
- SHT_DYNSYM = 11, // Symbol table.
- SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type.
- SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
- SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
- SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
- };
-
- // Special section indices.
- enum {
- SHN_UNDEF = 0, // Undefined, missing, irrelevant
- SHN_LORESERVE = 0xff00, // Lowest reserved index
- SHN_LOPROC = 0xff00, // Lowest processor-specific index
- SHN_HIPROC = 0xff1f, // Highest processor-specific index
- SHN_ABS = 0xfff1, // Symbol has absolute value; no relocation
- SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables
- SHN_HIRESERVE = 0xffff // Highest reserved index
- };
-
/// SectionIdx - The number of the section in the Section Table.
unsigned short SectionIdx;
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 36b0e6514b3a..3fb087c5ea8b 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -71,7 +71,7 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) {
bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
// Add a symbol to represent the function.
const Function *F = MF.getFunction();
- ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC,
+ ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
EW.getGlobalELFVisibility(F));
FnSym->SectionIdx = ES->SectionIdx;
FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index b644ebeb4be5..d14728d8a36c 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -63,7 +63,7 @@ char ELFWriter::ID = 0;
//===----------------------------------------------------------------------===//
ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
- : MachineFunctionPass(&ID), O(o), TM(tm),
+ : MachineFunctionPass(ID), O(o), TM(tm),
OutContext(*new MCContext(*TM.getMCAsmInfo())),
TLOF(TM.getTargetLowering()->getObjFileLowering()),
is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
@@ -129,12 +129,12 @@ bool ELFWriter::doInitialization(Module &M) {
ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
- ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION]
+ ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
- ElfHdr.emitWord16(ET_REL); // e_type
+ ElfHdr.emitWord16(ELF::ET_REL); // e_type
ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
- ElfHdr.emitWord32(EV_CURRENT); // e_version
+ ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
ElfHdr.emitWord(0); // e_entry, no entry point in .o file
ElfHdr.emitWord(0); // e_phoff, no program header for .o
ELFHdr_e_shoff_Offset = ElfHdr.size();
@@ -252,7 +252,7 @@ ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
// is true if the relocation section contains entries with addends.
ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
unsigned SectionType = TEW->hasRelocationAddend() ?
- ELFSection::SHT_RELA : ELFSection::SHT_REL;
+ ELF::SHT_RELA : ELF::SHT_REL;
std::string SectionName(".rel");
if (TEW->hasRelocationAddend())
@@ -268,11 +268,11 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
default:
llvm_unreachable("unknown visibility type");
case GlobalValue::DefaultVisibility:
- return ELFSym::STV_DEFAULT;
+ return ELF::STV_DEFAULT;
case GlobalValue::HiddenVisibility:
- return ELFSym::STV_HIDDEN;
+ return ELF::STV_HIDDEN;
case GlobalValue::ProtectedVisibility:
- return ELFSym::STV_PROTECTED;
+ return ELF::STV_PROTECTED;
}
return 0;
}
@@ -280,23 +280,23 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
// getGlobalELFBinding - Returns the ELF specific binding type
unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
if (GV->hasInternalLinkage())
- return ELFSym::STB_LOCAL;
+ return ELF::STB_LOCAL;
if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
- return ELFSym::STB_WEAK;
+ return ELF::STB_WEAK;
- return ELFSym::STB_GLOBAL;
+ return ELF::STB_GLOBAL;
}
// getGlobalELFType - Returns the ELF specific type for a global
unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
if (GV->isDeclaration())
- return ELFSym::STT_NOTYPE;
+ return ELF::STT_NOTYPE;
if (isa<Function>(GV))
- return ELFSym::STT_FUNC;
+ return ELF::STT_FUNC;
- return ELFSym::STT_OBJECT;
+ return ELF::STT_OBJECT;
}
// IsELFUndefSym - True if the global value must be marked as a symbol
@@ -364,7 +364,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
GblSym->Size = Size;
if (S->HasCommonSymbols()) { // Symbol must go to a common section
- GblSym->SectionIdx = ELFSection::SHN_COMMON;
+ GblSym->SectionIdx = ELF::SHN_COMMON;
// A new linkonce section is created for each global in the
// common section, the default alignment is 1 and the symbol
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index db66ecc6dd83..b8bac5598ecf 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -39,6 +39,7 @@ namespace llvm {
class raw_ostream;
class SectionKind;
class MCContext;
+ class TargetMachine;
typedef std::vector<ELFSym*>::iterator ELFSymIter;
typedef std::vector<ELFSection*>::iterator ELFSectionIter;
@@ -160,29 +161,29 @@ namespace llvm {
SN->SectionIdx = NumSections++;
SN->Type = Type;
SN->Flags = Flags;
- SN->Link = ELFSection::SHN_UNDEF;
+ SN->Link = ELF::SHN_UNDEF;
SN->Align = Align;
return *SN;
}
ELFSection &getNonExecStackSection() {
- return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
+ return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
}
ELFSection &getSymbolTableSection() {
- return getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ return getSection(".symtab", ELF::SHT_SYMTAB, 0);
}
ELFSection &getStringTableSection() {
- return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1);
+ return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
}
ELFSection &getSectionHeaderStringTableSection() {
- return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1);
+ return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
}
ELFSection &getNullSection() {
- return getSection("", ELFSection::SHT_NULL, 0);
+ return getSection("", ELF::SHT_NULL, 0);
}
ELFSection &getDataSection();
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index ab0a80022531..0f6e882a7be4 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -30,8 +30,8 @@ namespace {
raw_ostream &OS;
public:
- Printer() : FunctionPass(&ID), OS(errs()) {}
- explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {}
+ Printer() : FunctionPass(ID), OS(errs()) {}
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
const char *getPassName() const;
@@ -55,8 +55,8 @@ namespace {
}
-static RegisterPass<GCModuleInfo>
-X("collector-metadata", "Create Garbage Collector Module Metadata");
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false);
// -----------------------------------------------------------------------------
@@ -70,7 +70,7 @@ GCFunctionInfo::~GCFunctionInfo() {}
char GCModuleInfo::ID = 0;
GCModuleInfo::GCModuleInfo()
- : ImmutablePass(&ID) {}
+ : ImmutablePass(ID) {}
GCModuleInfo::~GCModuleInfo() {
clear();
@@ -189,7 +189,7 @@ FunctionPass *llvm::createGCInfoDeleter() {
return new Deleter();
}
-Deleter::Deleter() : FunctionPass(&ID) {}
+Deleter::Deleter() : FunctionPass(ID) {}
const char *Deleter::getPassName() const {
return "Delete Garbage Collector Information";
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 71506cc6abb9..719fa194d8da 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -130,7 +130,7 @@ FunctionPass *llvm::createGCLoweringPass() {
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
- : FunctionPass(&ID) {}
+ : FunctionPass(ID) {}
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
@@ -260,7 +260,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
bool LowerRd = !S.customReadBarrier();
bool InitRoots = S.initializeRoots();
- SmallVector<AllocaInst*,32> Roots;
+ SmallVector<AllocaInst*, 32> Roots;
bool MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -271,7 +271,8 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcwrite:
if (LowerWr) {
// Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
CI->replaceAllUsesWith(St);
CI->eraseFromParent();
}
@@ -317,7 +318,7 @@ FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
char MachineCodeAnalysis::ID = 0;
MachineCodeAnalysis::MachineCodeAnalysis()
- : MachineFunctionPass(&ID) {}
+ : MachineFunctionPass(ID) {}
const char *MachineCodeAnalysis::getPassName() const {
return "Analyze Machine Code For Garbage Collection";
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 6b445e0b8e0f..0ea30d7a7929 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -154,7 +154,7 @@ namespace {
int FnNum;
public:
static char ID;
- IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {}
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "If Converter"; }
@@ -230,8 +230,7 @@ namespace {
char IfConverter::ID = 0;
}
-static RegisterPass<IfConverter>
-X("if-converter", "If Converter");
+INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 12adcaa3a22e..b965bfdcf3b8 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,10 +14,12 @@
#define DEBUG_TYPE "spiller"
#include "Spiller.h"
+#include "SplitKit.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -28,8 +30,10 @@ using namespace llvm;
namespace {
class InlineSpiller : public Spiller {
+ MachineFunctionPass &pass_;
MachineFunction &mf_;
LiveIntervals &lis_;
+ MachineLoopInfo &loops_;
VirtRegMap &vrm_;
MachineFrameInfo &mfi_;
MachineRegisterInfo &mri_;
@@ -37,9 +41,11 @@ class InlineSpiller : public Spiller {
const TargetRegisterInfo &tri_;
const BitVector reserved_;
+ SplitAnalysis splitAnalysis_;
+
// Variables that are valid during spill(), but used by multiple methods.
LiveInterval *li_;
- std::vector<LiveInterval*> *newIntervals_;
+ SmallVectorImpl<LiveInterval*> *newIntervals_;
const TargetRegisterClass *rc_;
int stackSlot_;
const SmallVectorImpl<LiveInterval*> *spillIs_;
@@ -53,25 +59,34 @@ class InlineSpiller : public Spiller {
~InlineSpiller() {}
public:
- InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : mf_(*mf), lis_(*lis), vrm_(*vrm),
- mfi_(*mf->getFrameInfo()),
- mri_(mf->getRegInfo()),
- tii_(*mf->getTarget().getInstrInfo()),
- tri_(*mf->getTarget().getRegisterInfo()),
- reserved_(tri_.getReservedRegs(mf_)) {}
+ InlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm)
+ : pass_(pass),
+ mf_(mf),
+ lis_(pass.getAnalysis<LiveIntervals>()),
+ loops_(pass.getAnalysis<MachineLoopInfo>()),
+ vrm_(vrm),
+ mfi_(*mf.getFrameInfo()),
+ mri_(mf.getRegInfo()),
+ tii_(*mf.getTarget().getInstrInfo()),
+ tri_(*mf.getTarget().getRegisterInfo()),
+ reserved_(tri_.getReservedRegs(mf_)),
+ splitAnalysis_(mf, lis_, loops_) {}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex);
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs);
private:
+ bool split();
+
bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
SlotIndex UseIdx);
bool reMaterializeFor(MachineBasicBlock::iterator MI);
void reMaterializeAll();
+ bool coalesceStackAccess(MachineInstr *MI);
bool foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops);
void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
@@ -80,12 +95,43 @@ private:
}
namespace llvm {
-Spiller *createInlineSpiller(MachineFunction *mf,
- LiveIntervals *lis,
- const MachineLoopInfo *mli,
- VirtRegMap *vrm) {
- return new InlineSpiller(mf, lis, vrm);
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
}
+
+/// split - try splitting the current interval into pieces that may allocate
+/// separately. Return true if successful.
+bool InlineSpiller::split() {
+ splitAnalysis_.analyze(li_);
+
+ if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
+ // We can split, but li_ may be left intact with fewer uses.
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitAroundLoop(loop))
+ return true;
+ }
+
+ // Try splitting into single block intervals.
+ SplitAnalysis::BlockPtrSet blocks;
+ if (splitAnalysis_.getMultiUseBlocks(blocks)) {
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitSingleBlocks(blocks))
+ return true;
+ }
+
+ // Try splitting inside a basic block.
+ if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitInsideBlock(MBB))
+ return true;
+ }
+
+ // We may have been able to split out some uses, but the original interval is
+ // intact, and it should still be spilled.
+ return false;
}
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
@@ -237,7 +283,7 @@ void InlineSpiller::reMaterializeAll() {
lis_.RemoveMachineInstrFromMaps(DefMI);
vrm_.RemoveMachineInstrFromMaps(DefMI);
DefMI->eraseFromParent();
- li_->removeValNo(VNI);
+ VNI->setIsDefAccurate(false);
anyRemoved = true;
}
@@ -253,8 +299,8 @@ void InlineSpiller::reMaterializeAll() {
MachineBasicBlock::iterator NextMI = MI;
++NextMI;
if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
- SlotIndex NearIdx = lis_.getInstructionIndex(NextMI);
- if (li_->liveAt(NearIdx))
+ VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+ if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
continue;
}
DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
@@ -262,6 +308,24 @@ void InlineSpiller::reMaterializeAll() {
}
}
+/// If MI is a load or store of stackSlot_, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
+ int FI = 0;
+ unsigned reg;
+ if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) &&
+ !(reg = tii_.isStoreToStackSlot(MI, FI)))
+ return false;
+
+ // We have a stack access. Is it the right register and slot?
+ if (reg != li_->reg || FI != stackSlot_)
+ return false;
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ lis_.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ return true;
+}
+
/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
/// Return true on success, and MI will be erased.
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
@@ -323,9 +387,8 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI,
}
void InlineSpiller::spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
DEBUG(dbgs() << "Inline spilling " << *li << "\n");
assert(li->isSpillable() && "Attempting to spill already spilled value.");
assert(!li->isStackSlot() && "Trying to spill a stack slot.");
@@ -335,13 +398,18 @@ void InlineSpiller::spill(LiveInterval *li,
rc_ = mri_.getRegClass(li->reg);
spillIs_ = &spillIs;
+ if (split())
+ return;
+
reMaterializeAll();
// Remat may handle everything.
if (li_->empty())
return;
- stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+ stackSlot_ = vrm_.getStackSlot(li->reg);
+ if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
+ stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
// Iterate over instructions using register.
for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
@@ -365,6 +433,10 @@ void InlineSpiller::spill(LiveInterval *li,
continue;
}
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI))
+ continue;
+
// Analyze instruction.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 03ae214ae7da..3852ebaf6425 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -481,7 +481,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
Value *Ops[3];
Ops[0] = CI->getArgOperand(0);
// Extend the amount to i32.
- Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context),
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
/* isSigned */ false);
Ops[2] = Size;
ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index bf3137e49536..36038027b259 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -85,7 +85,7 @@ static bool getVerboseAsm() {
case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
- }
+ }
}
// Enable or disable FastISel. Both options are needed, because
@@ -139,8 +139,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
// Create a code emitter if asked to show the encoding.
- //
- // FIXME: These are currently leaked.
MCCodeEmitter *MCE = 0;
if (ShowMCEncoding)
MCE = getTarget().createCodeEmitter(*this, *Context);
@@ -154,8 +152,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- //
- // FIXME: These are currently leaked.
MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
if (MCE == 0 || TAB == 0)
@@ -180,12 +176,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
if (Printer == 0)
return true;
-
+
// If successful, createAsmPrinter took ownership of AsmStreamer.
AsmStreamer.take();
-
+
PM.add(Printer);
-
+
// Make sure the code model is set.
setCodeModelForStatic();
PM.add(createGCInfoDeleter());
@@ -204,7 +200,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
bool DisableVerify) {
// Make sure the code model is set.
setCodeModelForJIT();
-
+
// Add common CodeGen passes.
MCContext *Ctx = 0;
if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
@@ -216,19 +212,36 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
return false; // success!
}
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ return true;
+ // Make sure the code model is set.
+ setCodeModelForJIT();
+
+ return false; // success!
+}
+
static void printNoVerify(PassManagerBase &PM, const char *Banner) {
if (PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
}
static void printAndVerify(PassManagerBase &PM,
- const char *Banner,
- bool allowDoubleDefs = false) {
+ const char *Banner) {
if (PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(allowDoubleDefs));
+ PM.add(createMachineVerifierPass());
}
/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -258,6 +271,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
// Turn exception handling constructs into something the code generators can
// handle.
switch (getMCAsmInfo()->getExceptionHandlingType()) {
@@ -269,26 +287,25 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
PM.add(createSjLjEHPass(getTargetLowering()));
- PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
- break;
+ // FALLTHROUGH
case ExceptionHandling::Dwarf:
- PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
+ PM.add(createDwarfEHPass(this));
break;
case ExceptionHandling::None:
PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
break;
}
- PM.add(createGCLoweringPass());
-
- // Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
-
if (OptLevel != CodeGenOpt::None && !DisableCGP)
PM.add(createCodeGenPreparePass(getTargetLowering()));
PM.add(createStackProtectorPass(getTargetLowering()));
+ addPreISel(PM, OptLevel);
+
if (PrintISelInput)
PM.add(createPrintFunctionPass("\n\n"
"*** Final LLVM Code input to ISel ***\n",
@@ -300,13 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createVerifierPass());
// Standard Lower-Level Passes.
-
+
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
// Set up a MachineFunction for the rest of CodeGen to work on.
PM.add(new MachineFunctionAnalysis(*this, OptLevel));
@@ -321,44 +337,43 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
return true;
// Print the instruction selected machine code...
- printAndVerify(PM, "After Instruction Selection",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Instruction Selection");
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
if (OptLevel != CodeGenOpt::None) {
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After codegen DCE pass");
- PM.add(createOptimizeExtsPass());
+ PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
- printAndVerify(PM, "After Machine LICM, CSE and Sinking passes",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
}
// Pre-ra tail duplication.
if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
PM.add(createTailDuplicatePass(true));
- printAndVerify(PM, "After Pre-RegAlloc TailDuplicate",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
}
// Run pre-ra passes.
if (addPreRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PreRegAlloc passes",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After PreRegAlloc passes");
// Perform register allocation.
PM.add(createRegisterAllocator(OptLevel));
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ad5728458062..59f380ad2641 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -166,6 +166,56 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
return I != begin() && (--I)->end > Start;
}
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->setIsUnused(true);
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ bool seenPHIDef = false;
+ valnos.clear();
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ VNInfo *VNI = I->valno;
+ if (!Seen.insert(VNI))
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live range");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ VNI->setHasPHIKill(false);
+ if (VNI->isPHIDef())
+ seenPHIDef = true;
+ }
+
+ // Recompute phi kill flags.
+ if (!seenPHIDef)
+ return;
+ for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (!VNI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
+ assert(PHIBB && "No basic block for phi-def");
+ for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
+ PE = PHIBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
+ if (KVNI)
+ KVNI->setHasPHIKill(true);
+ }
+ }
+}
+
/// extendIntervalEndTo - This method is used when we want to extend the range
/// specified by I to end at the specified endpoint. To do this, we should
/// merge and eliminate all ranges that this will overlap with. The iterator is
@@ -175,7 +225,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
VNInfo *ValNo = I->valno;
// Search for the first interval that we can't merge with.
- Ranges::iterator MergeTo = next(I);
+ Ranges::iterator MergeTo = llvm::next(I);
for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
}
@@ -184,11 +234,11 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
I->end = std::max(NewEnd, prior(MergeTo)->end);
// Erase any dead ranges.
- ranges.erase(next(I), MergeTo);
+ ranges.erase(llvm::next(I), MergeTo);
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
- Ranges::iterator Next = next(I);
+ Ranges::iterator Next = llvm::next(I);
if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
I->end = Next->end;
ranges.erase(Next);
@@ -227,7 +277,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
MergeTo->end = I->end;
}
- ranges.erase(next(MergeTo), next(I));
+ ranges.erase(llvm::next(MergeTo), llvm::next(I));
return MergeTo;
}
@@ -280,7 +330,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
return ranges.insert(it, LR);
}
-/// isInOneLiveRange - Return true if the range specified is entirely in
+/// isInOneLiveRange - Return true if the range specified is entirely in
/// a single LiveRange of the live interval.
bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
@@ -314,16 +364,8 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
break;
}
if (isDead) {
- // Now that ValNo is dead, remove it. If it is the largest value
- // number, just nuke it (and any other deleted values neighboring it),
- // otherwise mark it as ~1U so it can be nuked later.
- if (ValNo->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- ValNo->setIsUnused(true);
- }
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
}
}
@@ -345,7 +387,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
I->end = Start; // Trim the old interval.
// Insert the new one.
- ranges.insert(next(I), LiveRange(End, OldEnd, ValNo));
+ ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
}
/// removeValNo - Remove all the ranges defined by the specified value#.
@@ -359,21 +401,13 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
if (I->valno == ValNo)
ranges.erase(I);
} while (I != E);
- // Now that ValNo is dead, remove it. If it is the largest value
- // number, just nuke it (and any other deleted values neighboring it),
- // otherwise mark it as ~1U so it can be nuked later.
- if (ValNo->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- ValNo->setIsUnused(true);
- }
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
}
/// getLiveRangeContaining - Return the live range that contains the
/// specified index, or null if there is none.
-LiveInterval::const_iterator
+LiveInterval::const_iterator
LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
const_iterator It = std::upper_bound(begin(), end(), Idx);
if (It != ranges.begin()) {
@@ -385,7 +419,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
return end();
}
-LiveInterval::iterator
+LiveInterval::iterator
LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
iterator It = std::upper_bound(begin(), end(), Idx);
if (It != begin()) {
@@ -393,7 +427,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
if (It->contains(Idx))
return It;
}
-
+
return end();
}
@@ -425,11 +459,11 @@ VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
/// the intervals are not joinable, this aborts.
void LiveInterval::join(LiveInterval &Other,
const int *LHSValNoAssignments,
- const int *RHSValNoAssignments,
+ const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
// Determine if any of our live range values are mapped. This is uncommon, so
- // we want to avoid the interval scan if not.
+ // we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
unsigned NumVals = getNumValNums();
unsigned NumNewVals = NewVNInfo.size();
@@ -449,7 +483,7 @@ void LiveInterval::join(LiveInterval &Other,
++OutIt;
for (iterator I = OutIt, E = end(); I != E; ++I) {
OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
-
+
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one LiveRange. This happens when we
// have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
@@ -460,12 +494,12 @@ void LiveInterval::join(LiveInterval &Other,
OutIt->start = I->start;
OutIt->end = I->end;
}
-
+
// Didn't merge, on to the next one.
++OutIt;
}
}
-
+
// If we merge some live ranges, chop off the end.
ranges.erase(OutIt, end());
}
@@ -483,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other,
if (VNI) {
if (NumValNos >= NumVals)
valnos.push_back(VNI);
- else
+ else
valnos[NumValNos] = VNI;
VNI->id = NumValNos++; // Renumber val#.
}
@@ -502,25 +536,13 @@ void LiveInterval::join(LiveInterval &Other,
}
ComputeJoinedWeight(Other);
-
- // Update regalloc hint if currently there isn't one.
- if (TargetRegisterInfo::isVirtualRegister(reg) &&
- TargetRegisterInfo::isVirtualRegister(Other.reg)) {
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
- if (Hint.first == 0 && Hint.second == 0) {
- std::pair<unsigned, unsigned> OtherHint =
- MRI->getRegAllocationHint(Other.reg);
- if (OtherHint.first || OtherHint.second)
- MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
- }
- }
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
/// interval as the specified value number. The LiveRanges in RHS are
/// allowed to overlap with LiveRanges in the current interval, but only if
/// the overlapping LiveRanges have the specified value number.
-void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
// TODO: Make this more efficient.
iterator InsertPos = begin();
@@ -569,7 +591,7 @@ void LiveInterval::MergeValueInAsValue(
// If this trimmed away the whole range, ignore it.
if (Start == End) continue;
}
-
+
// Map the valno in the other live range to the current live range.
IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
}
@@ -584,18 +606,10 @@ void LiveInterval::MergeValueInAsValue(
if (I->valno == V1) {
isDead = false;
break;
- }
- if (isDead) {
- // Now that V1 is dead, remove it. If it is the largest value number,
- // just nuke it (and any other deleted values neighboring it), otherwise
- // mark it as ~1U so it can be nuked later.
- if (V1->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- V1->setIsUnused(true);
}
+ if (isDead) {
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
}
}
}
@@ -609,7 +623,7 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
const LiveInterval &Clobbers,
VNInfo::Allocator &VNInfoAllocator) {
if (Clobbers.empty()) return;
-
+
DenseMap<VNInfo*, VNInfo*> ValNoMaps;
VNInfo *UnusedValNo = 0;
iterator IP = begin();
@@ -679,10 +693,10 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
// for unknown values, use it.
VNInfo *ClobberValNo =
getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
+
iterator IP = begin();
IP = std::upper_bound(IP, end(), Start);
-
+
// If the start of this range overlaps with an existing liverange, trim it.
if (IP != begin() && IP[-1].end > Start) {
Start = IP[-1].end;
@@ -695,7 +709,7 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
// If this trimmed away the whole range, ignore it.
if (Start == End) return;
}
-
+
// Insert the clobber interval.
addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
}
@@ -722,7 +736,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
for (iterator I = begin(); I != end(); ) {
iterator LR = I++;
if (LR->valno != V1) continue; // Not a V1 LiveRange.
-
+
// Okay, we found a V1 live range. If it had a previous, touching, V2 live
// range, extend it.
if (LR != begin()) {
@@ -736,11 +750,11 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
LR = Prev;
}
}
-
+
// Okay, now we have a V1 or V2 live range that is maximally merged forward.
// Ensure that it is a V2 live-range.
LR->valno = V2;
-
+
// If we can merge it into later V2 live ranges, do so now. We ignore any
// following V1 live ranges, as they will be merged in subsequent iterations
// of the loop.
@@ -752,18 +766,10 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
}
-
- // Now that V1 is dead, remove it. If it is the largest value number, just
- // nuke it (and any other deleted values neighboring it), otherwise mark it as
- // ~1U so it can be nuked later.
- if (V1->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (valnos.back()->isUnused());
- } else {
- V1->setIsUnused(true);
- }
-
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
return V2;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 194d03d8dbfb..2726fc337539 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -47,7 +47,7 @@
using namespace llvm;
// Hidden options for help debugging.
-static cl::opt<bool> DisableReMat("disable-rematerialization",
+static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
STATISTIC(numIntervals , "Number of original intervals");
@@ -55,22 +55,24 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
-static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+INITIALIZE_PASS(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false);
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
AU.addRequired<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
-
+
if (!StrongPHIElim) {
AU.addPreservedID(PHIEliminationID);
AU.addRequiredID(PHIEliminationID);
}
-
+
AU.addRequiredID(TwoAddressInstructionPassID);
AU.addPreserved<ProcessImplicitDefs>();
AU.addRequired<ProcessImplicitDefs>();
@@ -84,7 +86,7 @@ void LiveIntervals::releaseMemory() {
for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
E = r2iMap_.end(); I != E; ++I)
delete I->second;
-
+
r2iMap_.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
@@ -188,10 +190,6 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
const MachineInstr &MI = *I;
// Allow copies to and from li.reg
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
- if (SrcReg == li.reg || DstReg == li.reg)
- continue;
if (MI.isCopy())
if (MI.getOperand(0).getReg() == li.reg ||
MI.getOperand(1).getReg() == li.reg)
@@ -278,7 +276,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
/// isPartialRedef - Return true if the specified def at the specific index is
/// partially re-defining the specified live interval. A common case of this is
-/// a definition of the sub-register.
+/// a definition of the sub-register.
bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
LiveInterval &interval) {
if (!MO.getSubReg() || MO.isEarlyClobber())
@@ -324,9 +322,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
mi->addRegisterDefined(interval.reg);
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ if (mi->isCopyLike()) {
CopyMI = mi;
}
@@ -420,8 +416,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// def-and-use register operand.
// It may also be partial redef like this:
- // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
- // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+ // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+ // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
bool PartReDef = isPartialRedef(MIIdx, MO, interval);
if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
// If this is a two-address definition, then we have already processed
@@ -454,11 +450,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
OldValNo->setCopy(0);
// A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (PartReDef && (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)))
+ if (PartReDef && mi->isCopyLike())
OldValNo->setCopy(&*mi);
-
+
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
DEBUG(dbgs() << " replace range with " << LR);
@@ -485,12 +479,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo;
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (mi->isCopyLike())
CopyMI = mi;
ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
-
+
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
@@ -567,10 +559,10 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
goto exit;
}
}
-
+
baseIndex = baseIndex.getNextIndex();
}
-
+
// The only case we should have a dead physreg here without a killing or
// instruction where we know it's dead is if it is live-in to the function
// and never used. Another possible case is the implicit use of the
@@ -602,9 +594,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
getOrCreateInterval(MO.getReg()));
else if (allocatableRegs_[MO.getReg()]) {
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (MI->isCopyLike() ||
- tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (MI->isCopyLike())
CopyMI = MI;
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
getOrCreateInterval(MO.getReg()), CopyMI);
@@ -696,7 +686,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
/// registers. for some ordering of the machine instructions [1,N] a
/// live interval is an interval [i, j) where 1 <= i <= j < N for
/// which a variable is live
-void LiveIntervals::computeIntervals() {
+void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
@@ -723,11 +713,11 @@ void LiveIntervals::computeIntervals() {
handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
true);
}
-
+
// Skip over empty initial indices.
if (getInstructionFromIndex(MIIndex) == 0)
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
-
+
for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
MI != miEnd; ++MI) {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
@@ -746,7 +736,7 @@ void LiveIntervals::computeIntervals() {
else if (MO.isUndef())
UndefUses.push_back(MO.getReg());
}
-
+
// Move to the next instr slot.
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
}
@@ -791,7 +781,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
unsigned Reg = MO.getReg();
if (Reg == 0 || Reg == li.reg)
continue;
-
+
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
!allocatableRegs_[Reg])
continue;
@@ -810,7 +800,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
/// which reaches the given instruction also reaches the specified use index.
bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
SlotIndex UseIdx) const {
- SlotIndex Index = getInstructionIndex(MI);
+ SlotIndex Index = getInstructionIndex(MI);
VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
return UI != li.end() && UI->valno == ValNo;
@@ -915,7 +905,7 @@ static bool FilterFoldedOps(MachineInstr *MI,
}
return false;
}
-
+
/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
/// slot / to reg or any rematerialized load into ith operand of specified
@@ -1035,7 +1025,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
bool LiveIntervals::
rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
- bool TrySplit, SlotIndex index, SlotIndex end,
+ bool TrySplit, SlotIndex index, SlotIndex end,
MachineInstr *MI,
MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
unsigned Slot, int LdSlot,
@@ -1094,7 +1084,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
// keep the src/dst regs pinned.
//
// Keep track of whether we replace a use and/or def so that we can
- // create the spill interval with the appropriate range.
+ // create the spill interval with the appropriate range.
SmallVector<unsigned, 2> Ops;
tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
@@ -1156,7 +1146,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (mopj.isImplicit())
rewriteImplicitOps(li, MI, NewVReg, vrm);
}
-
+
if (CreatedNewVReg) {
if (DefIsReMat) {
vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
@@ -1696,7 +1686,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
Slot = vrm.assignVirt2StackSlot(li.reg);
-
+
// This case only occurs when the prealloc splitter has already assigned
// a stack slot to this vreg.
else
@@ -1753,7 +1743,7 @@ addIntervalsForSpills(const LiveInterval &li,
Ops.push_back(j);
if (MO.isDef())
continue;
- if (isReMat ||
+ if (isReMat ||
(!FoundUse && !alsoFoldARestore(Id, index, VReg,
RestoreMBBs, RestoreIdxes))) {
// MI has two-address uses of the same register. If the use
@@ -1866,7 +1856,6 @@ addIntervalsForSpills(const LiveInterval &li,
for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
LiveInterval *LI = NewLIs[i];
if (!LI->empty()) {
- LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI);
if (!AddedKill.count(LI)) {
LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
SlotIndex LastUseIdx = LR->end.getBaseIndex();
@@ -1899,7 +1888,7 @@ bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
/// getRepresentativeReg - Find the largest super register of the specified
/// physical register.
unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
- // Find the largest super-register that is allocatable.
+ // Find the largest super-register that is allocatable.
unsigned BestReg = Reg;
for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
unsigned SuperReg = *AS;
@@ -2013,7 +2002,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
getMBBEndIdx(startInst->getParent()), VN);
Interval.addRange(LR);
-
+
return LR;
}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 709e2c6d5ca7..b5c385f77239 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,7 +25,8 @@
using namespace llvm;
char LiveStacks::ID = 0;
-static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
+INITIALIZE_PASS(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false);
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 41b891d30f23..375307b973a9 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -42,7 +42,8 @@
using namespace llvm;
char LiveVariables::ID = 0;
-static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+INITIALIZE_PASS(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false);
void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -482,21 +483,6 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
}
}
-namespace {
- struct RegSorter {
- const TargetRegisterInfo *TRI;
-
- RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
- bool operator()(unsigned A, unsigned B) {
- if (TRI->isSubRegister(A, B))
- return true;
- else if (TRI->isSubRegister(B, A))
- return false;
- return A < B;
- }
- };
-}
-
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 000000000000..7e366f0ceec0
--- /dev/null
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,354 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates the are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const {
+ return "Local Stack Slot Allocation";
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+ return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+ // A base register definition is a register+offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+
+ // Loop throught the frame references and allocate for them as necessary
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
+ FrameIdx, InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index dfd4eaeca660..ad1c537c1911 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
+ LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
const char *getPassName() const {
return "Subregister lowering instruction pass";
@@ -58,9 +58,6 @@ namespace {
void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
const TargetRegisterInfo *TRI);
- void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
- const TargetRegisterInfo *TRI,
- bool AddIfNotFound = false);
void TransferImplicitDefs(MachineInstr *MI);
};
@@ -87,23 +84,6 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
}
}
-/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed,
-/// and the lowered replacement instructions immediately precede it.
-/// Mark the replacement instructions with the kill flag.
-void
-LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
- unsigned SrcReg,
- const TargetRegisterInfo *TRI,
- bool AddIfNotFound) {
- for (MachineBasicBlock::iterator MII =
- prior(MachineBasicBlock::iterator(MI)); ; --MII) {
- if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
- break;
- assert(MII != MI->getParent()->begin() &&
- "copyPhysReg output doesn't reference source register!");
- }
-}
-
/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
/// replacement instructions immediately precede it. Copy any implicit-def
/// operands from MI to the replacement instruction.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index a27ee479433b..50f3f672dced 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -441,7 +441,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
- DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ DEBUG(dbgs() << "Splitting critical edge:"
" BB#" << getNumber()
<< " -- BB#" << NMBB->getNumber()
<< " -- BB#" << Succ->getNumber() << '\n');
@@ -468,11 +468,33 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LV->addNewBlock(NMBB, this, Succ);
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->addNewBlock(NMBB, this);
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
- if (MachineLoopInfo *MLI =
- P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 833cc00027db..92e2299ec62f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -41,7 +41,7 @@ namespace {
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -49,6 +49,7 @@ namespace {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
@@ -85,8 +86,8 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
-static RegisterPass<MachineCSE>
-X("machine-cse", "Machine Common Subexpression Elimination");
+INITIALIZE_PASS(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false);
FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
@@ -107,29 +108,9 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (DefMI->getParent() != MBB)
continue;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- !SrcSubIdx && !DstSubIdx) {
- const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
- if (!NewRC)
- continue;
- DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
- MO.setReg(SrcReg);
- MRI->clearKillFlags(SrcReg);
- if (NewRC != SRC)
- MRI->setRegClass(SrcReg, NewRC);
- DefMI->eraseFromParent();
- ++NumCoalesces;
- Changed = true;
- }
-
if (!DefMI->isCopy())
continue;
- SrcReg = DefMI->getOperand(1).getReg();
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
continue;
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
@@ -261,19 +242,13 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
return false;
}
-static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- return MI->isCopyLike() ||
- TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
-}
-
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
return false;
// Ignore copies.
- if (isCopy(MI, TII))
+ if (MI->isCopyLike())
return false;
// Ignore stuff that we obviously can't move.
@@ -329,7 +304,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
E = MRI->use_nodbg_end(); I != E; ++I) {
MachineInstr *Use = &*I;
// Ignore copies.
- if (!isCopy(Use, TII)) {
+ if (!Use->isCopyLike()) {
HasNonCopyUse = true;
break;
}
@@ -385,7 +360,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Look for trivial copy coalescing opportunities.
if (PerformTrivialCoalescing(MI, MBB)) {
// After coalescing MI itself may become a copy.
- if (isCopy(MI, TII))
+ if (MI->isCopyLike())
continue;
FoundCSE = VNT.count(MI);
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index b5f8fbba99de..3c674789244a 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -24,10 +24,10 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
char MachineDominatorTree::ID = 0;
-static RegisterPass<MachineDominatorTree>
-E("machinedomtree", "MachineDominator Tree Construction", true);
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true);
-const PassInfo *const llvm::MachineDominatorsID = &E;
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -41,7 +41,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
}
MachineDominatorTree::MachineDominatorTree()
- : MachineFunctionPass(&ID) {
+ : MachineFunctionPass(ID) {
DT = new DominatorTreeBase<MachineBasicBlock>(false);
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 666120f032c6..017170076ceb 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -397,7 +397,6 @@ void MachineFunction::viewCFGOnly() const
/// create a corresponding virtual register for it.
unsigned MachineFunction::addLiveIn(unsigned PReg,
const TargetRegisterClass *RC) {
- assert(RC->contains(PReg) && "Not the correct regclass!");
MachineRegisterInfo &MRI = getRegInfo();
unsigned VReg = MRI.getLiveInVirtReg(PReg);
if (VReg) {
@@ -447,7 +446,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
unsigned StackAlign = TFI.getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/false));
+ /*isSS*/false, false));
return -++NumFixedObjects;
}
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 07a0f45c0f48..4f84b952e061 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -20,14 +20,14 @@ using namespace llvm;
// a default constructor.
static PassInfo
X("Machine Function Analysis", "machine-function-analysis",
- intptr_t(&MachineFunctionAnalysis::ID), 0,
+ &MachineFunctionAnalysis::ID, 0,
/*CFGOnly=*/false, /*is_analysis=*/true);
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
- FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
+ FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
}
MachineFunctionAnalysis::~MachineFunctionAnalysis() {
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 547c4febc8da..2aaa798a02c1 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -29,7 +29,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
const std::string Banner;
MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
- : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
const char *getPassName() const { return "MachineFunction Printer"; }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 6b2e98549c71..446e461d5460 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1236,12 +1236,18 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
const MachineFunction *MF = 0;
+ const MachineRegisterInfo *MRI = 0;
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
if (!TM && MF)
TM = &MF->getTarget();
+ if (MF)
+ MRI = &MF->getRegInfo();
}
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
// Print explicitly defined operands on the left of an assignment syntax.
unsigned StartOp = 0, e = getNumOperands();
for (; StartOp < e && getOperand(StartOp).isReg() &&
@@ -1250,6 +1256,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
++StartOp) {
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, TM);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+ VirtRegs.push_back(Reg);
}
if (StartOp != 0)
@@ -1264,6 +1273,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
// Omit call-clobbered registers which aren't used anywhere. This makes
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
@@ -1325,11 +1338,29 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
i != e; ++i) {
OS << **i;
- if (next(i) != e)
+ if (llvm::next(i) != e)
OS << " ";
}
}
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+ OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << VirtRegs[j];
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 4c054f51f3a8..1a74b747e9f2 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -68,16 +68,16 @@ namespace {
BitVector AllocatableSet;
- // For each opcode, keep a list of potentail CSE instructions.
+ // For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
- MachineFunctionPass(&ID), PreRegAlloc(true) {}
+ MachineFunctionPass(ID), PreRegAlloc(true) {}
explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -189,8 +189,8 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
-static RegisterPass<MachineLICM>
-X("machinelicm", "Machine Loop Invariant Code Motion");
+INITIALIZE_PASS(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false);
FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
@@ -488,9 +488,14 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
MII = NextMII;
}
- const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
- for (unsigned I = 0, E = Children.size(); I != E; ++I)
- HoistRegion(Children[I]);
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() < 25) {
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+ }
}
/// IsLICMCandidate - Returns true if the instruction may be a suitable
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 269538b31d0b..bca4b0c28985 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,10 +30,10 @@ TEMPLATE_INSTANTIATION(MLIB);
}
char MachineLoopInfo::ID = 0;
-static RegisterPass<MachineLoopInfo>
-X("machine-loops", "Machine Natural Loop Construction", true);
+INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true);
-const PassInfo *const llvm::MachineLoopInfoID = &X;
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 15778b46fe0a..b647a4dcc530 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use TargetData's.
-static RegisterPass<MachineModuleInfo>
-X("machinemoduleinfo", "Machine Module Information");
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false);
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -254,7 +254,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(&ID), Context(MAI),
+: ImmutablePass(ID), Context(MAI),
ObjFileMMI(0),
CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
// Always emit some info, by default "no personality" info.
@@ -264,7 +264,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -579,10 +579,3 @@ namespace {
}
};
}
-
-MachineModuleInfo::VariableDbgInfoMapTy &
-MachineModuleInfo::getVariableDbgInfo() {
- std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(),
- VariableDebugSorter());
- return VariableDbgInfo;
-}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 61334fc1790a..c8f8fafe227e 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -26,11 +26,21 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-STATISTIC(NumSunk, "Number of machine instructions sunk");
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+SplitLimit("split-limit",
+ cl::init(~0u), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
namespace {
class MachineSinking : public MachineFunctionPass {
@@ -44,7 +54,7 @@ namespace {
public:
static char ID; // Pass identification
- MachineSinking() : MachineFunctionPass(&ID) {}
+ MachineSinking() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -59,21 +69,28 @@ namespace {
}
private:
bool ProcessBlock(MachineBasicBlock &MBB);
+ MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
+ MachineBasicBlock *To);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
- bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB, bool &LocalUse) const;
};
} // end anonymous namespace
char MachineSinking::ID = 0;
-static RegisterPass<MachineSinking>
-X("machine-sink", "Machine code sinking");
+INITIALIZE_PASS(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false);
FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
-/// occur in blocks dominated by the specified block.
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB) const {
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &LocalUse) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
// Ignoring debug uses is necessary so debug info doesn't affect the code.
@@ -91,6 +108,9 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
}
// Check that it dominates.
@@ -166,6 +186,66 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
return MadeChange;
}
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB) {
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+ return 0;
+
+ // Check for more "complex" loops.
+ if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
+ !LI->isLoopHeader(ToBB)) {
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+
+ // FIXME: Determine if it's cost effective to break this edge.
+ return FromBB->SplitCriticalEdge(ToBB, this);
+ }
+
+ return 0;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
@@ -246,7 +326,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (SuccToSinkTo) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
return false;
continue;
@@ -256,10 +337,14 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// we should sink to.
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
E = ParentBlock->succ_end(); SI != E; ++SI) {
- if (AllUsesDominatedByBlock(Reg, *SI)) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
SuccToSinkTo = *SI;
break;
}
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return false;
}
// If we couldn't find a block to sink to, ignore this instruction.
@@ -303,27 +388,44 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
+ bool TryBreak = false;
bool store = true;
if (!MI->isSafeToMove(TII, AA, store)) {
- DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
- return false;
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
}
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
- if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
- return false;
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
}
// Don't sink instructions into a loop.
- if (LI->isLoopHeader(SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
- return false;
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
}
// Otherwise we are OK with sinking along a critical edge.
- DEBUG(dbgs() << "Sinking along critical edge.\n");
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+ if (!NewSucc) {
+ DEBUG(dbgs() <<
+ " *** PUNTING: Not legal or profitable to break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+ }
}
// Determine where to insert into. Skip phi nodes.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 2297c908b1e0..1e88562935ea 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,4 +1,4 @@
-//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===//
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,6 +24,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -44,19 +45,14 @@ using namespace llvm;
namespace {
struct MachineVerifier {
- MachineVerifier(Pass *pass, bool allowDoubleDefs) :
+ MachineVerifier(Pass *pass) :
PASS(pass),
- allowVirtDoubleDefs(allowDoubleDefs),
- allowPhysDoubleDefs(true),
OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
{}
bool runOnMachineFunction(MachineFunction &MF);
Pass *const PASS;
- const bool allowVirtDoubleDefs;
- const bool allowPhysDoubleDefs;
-
const char *const OutFileName;
raw_ostream *OS;
const MachineFunction *MF;
@@ -91,10 +87,6 @@ namespace {
// defined. Map value is the user.
RegMap vregsLiveIn;
- // Vregs that must be dead in because they are defined without being
- // killed first. Map value is the defining instruction.
- RegMap vregsDeadIn;
-
// Regs killed in MBB. They may be defined again, and will then be in both
// regsKilled and regsLiveOut.
RegSet regsKilled;
@@ -175,6 +167,7 @@ namespace {
// Analysis information if available
LiveVariables *LiveVars;
+ const LiveIntervals *LiveInts;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -195,15 +188,14 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
+ void verifyLiveIntervals();
};
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
- bool AllowDoubleDefs;
- explicit MachineVerifierPass(bool allowDoubleDefs = false)
- : MachineFunctionPass(&ID),
- AllowDoubleDefs(allowDoubleDefs) {}
+ MachineVerifierPass()
+ : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -211,7 +203,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) {
- MF.verify(this, AllowDoubleDefs);
+ MF.verify(this);
return false;
}
};
@@ -219,17 +211,15 @@ namespace {
}
char MachineVerifierPass::ID = 0;
-static RegisterPass<MachineVerifierPass>
-MachineVer("machineverifier", "Verify generated machine code");
-static const PassInfo *const MachineVerifyID = &MachineVer;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false);
-FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
- return new MachineVerifierPass(allowPhysDoubleDefs);
+FunctionPass *llvm::createMachineVerifierPass() {
+ return new MachineVerifierPass();
}
-void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const {
- MachineVerifier(p, allowDoubleDefs)
- .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p) const {
+ MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -255,10 +245,13 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
TRI = TM->getRegisterInfo();
MRI = &MF.getRegInfo();
+ LiveVars = NULL;
+ LiveInts = NULL;
if (PASS) {
- LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
- } else {
- LiveVars = NULL;
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
}
visitMachineFunctionBefore();
@@ -512,6 +505,20 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if ((*I)->isStore() && !TI.mayStore())
report("Missing mayStore flag", MI);
}
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
}
void
@@ -570,15 +577,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
} else
isKill = MO->isKill();
- if (isKill) {
+ if (isKill)
addRegWithSubRegs(regsKilled, Reg);
- // Check that LiveVars knows this kill
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) {
- LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- if (std::find(VI.Kills.begin(),
- VI.Kills.end(), MI) == VI.Kills.end())
- report("Kill missing from LiveVariables", MO, MONum);
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(),
+ VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // TODO: Verify isKill == LI.killedAt.
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -607,6 +629,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDead, Reg);
else
addRegWithSubRegs(regsDefined, Reg);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
+ assert(LR->valno && "NULL valno is not allowed");
+ if (LR->valno->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << LR->valno->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
}
// Check register classes.
@@ -670,40 +714,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
- set_subtract(regsLive, regsKilled);
- regsKilled.clear();
-
- // Verify that both <def> and <def,dead> operands refer to dead registers.
- RegVector defs(regsDefined);
- defs.append(regsDead.begin(), regsDead.end());
-
- for (RegVector::const_iterator I = defs.begin(), E = defs.end();
- I != E; ++I) {
- if (regsLive.count(*I)) {
- if (TargetRegisterInfo::isPhysicalRegister(*I)) {
- if (!allowPhysDoubleDefs && !isReserved(*I) &&
- !regsLiveInButUnused.count(*I)) {
- report("Redefining a live physical register", MI);
- *OS << "Register " << TRI->getName(*I)
- << " was defined but already live.\n";
- }
- } else {
- if (!allowVirtDoubleDefs) {
- report("Redefining a live virtual register", MI);
- *OS << "Virtual register %reg" << *I
- << " was defined but already live.\n";
- }
- }
- } else if (TargetRegisterInfo::isVirtualRegister(*I) &&
- !MInfo.regsKilled.count(*I)) {
- // Virtual register defined without being killed first must be dead on
- // entry.
- MInfo.vregsDeadIn.insert(std::make_pair(*I, MI));
- }
- }
-
- set_subtract(regsLive, regsDead); regsDead.clear();
- set_union(regsLive, regsDefined); regsDefined.clear();
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
}
void
@@ -828,35 +841,15 @@ void MachineVerifier::visitMachineFunctionAfter() {
continue;
checkPHIOps(MFI);
-
- // Verify dead-in virtual registers.
- if (!allowVirtDoubleDefs) {
- for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
- PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
- BBInfo &PrInfo = MBBInfoMap[*PrI];
- if (!PrInfo.reachable)
- continue;
-
- for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
- E = MInfo.vregsDeadIn.end(); I != E; ++I) {
- // DeadIn register must be in neither regsLiveOut or vregsPassed of
- // any predecessor.
- if (PrInfo.isLiveOut(I->first)) {
- report("Live-in virtual register redefined", I->second);
- *OS << "Register %reg" << I->first
- << " was live-out from predecessor MBB #"
- << (*PrI)->getNumber() << ".\n";
- }
- }
- }
- }
}
- // Now check LiveVariables info if available
- if (LiveVars) {
+ // Now check liveness info if available
+ if (LiveVars || LiveInts)
calcRegsRequired();
+ if (LiveVars)
verifyLiveVariables();
- }
+ if (LiveInts)
+ verifyLiveIntervals();
}
void MachineVerifier::verifyLiveVariables() {
@@ -886,4 +879,55 @@ void MachineVerifier::verifyLiveVariables() {
}
}
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+ LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+ const LiveInterval &LI = *LVI->second;
+ assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I) {
+ VNInfo *VNI = *I;
+ const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+
+ if (!DefLR) {
+ if (!VNI->isUnused()) {
+ report("Valno not live at def and not marked unused", MF);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (VNI->isUnused())
+ continue;
+
+ if (DefLR->valno != VNI) {
+ report("Live range at def has different valno", MF);
+ DefLR->print(*OS);
+ *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ }
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+ const LiveRange &LR = *I;
+ assert(LR.valno && "Live range has no valno");
+
+ if (LR.valno->id >= LI.getNumValNums() ||
+ LR.valno != LI.getValNumInfo(LR.valno->id)) {
+ report("Foreign valno in live range", MF);
+ LR.print(*OS);
+ *OS << " has a valno not in " << LI << '\n';
+ }
+
+ if (LR.valno->isUnused()) {
+ report("Live range valno is marked unused", MF);
+ LR.print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ }
+ }
+}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
deleted file mode 100644
index dcdc243e5db3..000000000000
--- a/lib/CodeGen/OptimizeExts.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs optimization of sign / zero extension instructions. It
-// may be extended to handle other instructions of similar property.
-//
-// On some targets, some instructions, e.g. X86 sign / zero extension, may
-// leave the source value in the lower part of the result. This pass will
-// replace (some) uses of the pre-extension value with uses of the sub-register
-// of the results.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ext-opt"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
- cl::desc("Aggressive extension optimization"));
-
-STATISTIC(NumReuse, "Number of extension results reused");
-
-namespace {
- class OptimizeExts : public MachineFunctionPass {
- const TargetMachine *TM;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DT; // Machine dominator tree
-
- public:
- static char ID; // Pass identification
- OptimizeExts() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- if (Aggressive) {
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- }
- }
-
- private:
- bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs);
- };
-}
-
-char OptimizeExts::ID = 0;
-static RegisterPass<OptimizeExts>
-X("opt-exts", "Optimize sign / zero extensions");
-
-FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
-
-/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads
-/// a single register and writes a single register and it does not modify
-/// the source, and if the source value is preserved as a sub-register of
-/// the result, then replace all reachable uses of the source with the subreg
-/// of the result.
-/// Do not generate an EXTRACT that is used only in a debug use, as this
-/// changes the code. Since this code does not currently share EXTRACTs, just
-/// ignore all debug uses.
-bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
- bool Changed = false;
- LocalMIs.insert(MI);
-
- unsigned SrcReg, DstReg, SubIdx;
- if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
- return false;
-
- MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
- if (++UI == MRI->use_nodbg_end())
- // No other uses.
- return false;
-
- // Ok, the source has other uses. See if we can replace the other uses
- // with use of the result of the extension.
- SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI)
- ReachedBBs.insert(UI->getParent());
-
- bool ExtendLife = true;
- // Uses that are in the same BB of uses of the result of the instruction.
- SmallVector<MachineOperand*, 8> Uses;
- // Uses that the result of the instruction can reach.
- SmallVector<MachineOperand*, 8> ExtendedUses;
-
- UI = MRI->use_nodbg_begin(SrcReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = &*UI;
- if (UseMI == MI)
- continue;
- if (UseMI->isPHI()) {
- ExtendLife = false;
- continue;
- }
-
- // It's an error to translate this:
- //
- // %reg1025 = <sext> %reg1024
- // ...
- // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
- //
- // into this:
- //
- // %reg1025 = <sext> %reg1024
- // ...
- // %reg1027 = COPY %reg1025:4
- // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
- //
- // The problem here is that SUBREG_TO_REG is there to assert that an
- // implicit zext occurs. It doesn't insert a zext instruction. If we allow
- // the COPY here, it will give us the value after the <sext>,
- // not the original value of %reg1024 before <sext>.
- if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
- continue;
-
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- // Local uses that come after the extension.
- if (!LocalMIs.count(UseMI))
- Uses.push_back(&UseMO);
- } else if (ReachedBBs.count(UseMBB))
- // Non-local uses where the result of extension is used. Always
- // replace these unless it's a PHI.
- Uses.push_back(&UseMO);
- else if (Aggressive && DT->dominates(MBB, UseMBB))
- // We may want to extend live range of the extension result in order
- // to replace these uses.
- ExtendedUses.push_back(&UseMO);
- else {
- // Both will be live out of the def MBB anyway. Don't extend live
- // range of the extension result.
- ExtendLife = false;
- break;
- }
- }
-
- if (ExtendLife && !ExtendedUses.empty())
- // Ok, we'll extend the liveness of the extension result.
- std::copy(ExtendedUses.begin(), ExtendedUses.end(),
- std::back_inserter(Uses));
-
- // Now replace all uses.
- if (!Uses.empty()) {
- SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
- // Look for PHI uses of the extended result, we don't want to extend the
- // liveness of a PHI input. It breaks all kinds of assumptions down
- // stream. A PHI use is expected to be the kill of its source values.
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI)
- if (UI->isPHI())
- PHIBBs.insert(UI->getParent());
-
- const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
- MachineOperand *UseMO = Uses[i];
- MachineInstr *UseMI = UseMO->getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (PHIBBs.count(UseMBB))
- continue;
- unsigned NewVR = MRI->createVirtualRegister(RC);
- BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
- .addReg(DstReg, 0, SubIdx);
- UseMO->setReg(NewVR);
- ++NumReuse;
- Changed = true;
- }
- }
- }
-
- return Changed;
-}
-
-bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- MRI = &MF.getRegInfo();
- DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
-
- bool Changed = false;
-
- SmallPtrSet<MachineInstr*, 8> LocalMIs;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- LocalMIs.clear();
- for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
- ++MII) {
- MachineInstr *MI = &*MII;
- Changed |= OptimizeInstr(MI, MBB, LocalMIs);
- }
- }
-
- return Changed;
-}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 1613fe21e42d..edb4eea71b8a 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,7 @@ namespace {
public:
static char ID; // Pass identification
- OptimizePHIs() : MachineFunctionPass(&ID) {}
+ OptimizePHIs() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -54,8 +54,8 @@ namespace {
}
char OptimizePHIs::ID = 0;
-static RegisterPass<OptimizePHIs>
-X("opt-phis", "Optimize machine instruction PHIs");
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false);
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
@@ -101,16 +101,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
// Skip over register-to-register moves.
- unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
- if (SrcMI &&
- TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
- SrcSubIdx == 0 && DstSubIdx == 0 &&
- TargetRegisterInfo::isVirtualRegister(MvSrcReg))
- SrcMI = MRI->getVRegDef(MvSrcReg);
- else if (SrcMI && SrcMI->isCopy() &&
- !SrcMI->getOperand(0).getSubReg() &&
- !SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
if (!SrcMI)
return false;
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
index 3bb24e1cc370..791c227f0d07 100644
--- a/lib/CodeGen/PBQP/HeuristicBase.h
+++ b/lib/CodeGen/PBQP/HeuristicBase.h
@@ -173,9 +173,13 @@ namespace PBQP {
bool finished = false;
while (!finished) {
- if (!optimalReduce())
- if (!impl().heuristicReduce())
+ if (!optimalReduce()) {
+ if (impl().heuristicReduce()) {
+ getSolver().recordRN();
+ } else {
finished = true;
+ }
+ }
}
}
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index 02938df00700..35514f967478 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -226,6 +226,8 @@ namespace PBQP {
// Nothing to do. Just push the node onto the reduction stack.
pushToStack(nItr);
+
+ s.recordR0();
}
/// \brief Apply rule R1.
@@ -274,6 +276,7 @@ namespace PBQP {
assert(nd.getSolverDegree() == 0 &&
"Degree 1 with edge removed should be 0.");
pushToStack(xnItr);
+ s.recordR1();
}
/// \brief Apply rule R2.
@@ -378,8 +381,14 @@ namespace PBQP {
removeSolverEdge(zxeItr);
pushToStack(xnItr);
+ s.recordR2();
}
+ /// \brief Record an application of the RN rule.
+ ///
+ /// For use by the HeuristicBase.
+ void recordRN() { s.recordRN(); }
+
private:
NodeData& getSolverNodeData(Graph::NodeItr nItr) {
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 4c1ce119ed05..18eaf7c0da9b 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -52,9 +52,7 @@ namespace PBQP {
bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
return true;
- if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr))
- return false;
- return (&*n1Itr < &*n2Itr);
+ return false;
}
private:
HeuristicSolverImpl<Briggs> *s;
@@ -69,9 +67,7 @@ namespace PBQP {
cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
if (cost1 < cost2)
return true;
- if (cost1 > cost2)
- return false;
- return (&*n1Itr < &*n2Itr);
+ return false;
}
private:
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
index 294b5370afdf..047fd04c7cb8 100644
--- a/lib/CodeGen/PBQP/Solution.h
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -26,15 +26,46 @@ namespace PBQP {
/// To get the selection for each node in the problem use the getSelection method.
class Solution {
private:
+
typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
SelectionsMap selections;
+ unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
+
public:
/// \brief Number of nodes for which selections have been made.
/// @return Number of nodes for which selections have been made.
unsigned numNodes() const { return selections.size(); }
+ /// \brief Records a reduction via the R0 rule. Should be called from the
+ /// solver only.
+ void recordR0() { ++r0Reductions; }
+
+ /// \brief Returns the number of R0 reductions applied to solve the problem.
+ unsigned numR0Reductions() const { return r0Reductions; }
+
+ /// \brief Records a reduction via the R1 rule. Should be called from the
+ /// solver only.
+ void recordR1() { ++r1Reductions; }
+
+ /// \brief Returns the number of R1 reductions applied to solve the problem.
+ unsigned numR1Reductions() const { return r1Reductions; }
+
+ /// \brief Records a reduction via the R2 rule. Should be called from the
+ /// solver only.
+ void recordR2() { ++r2Reductions; }
+
+ /// \brief Returns the number of R2 reductions applied to solve the problem.
+ unsigned numR2Reductions() const { return r2Reductions; }
+
+ /// \brief Records a reduction via the RN rule. Should be called from the
+ /// solver only.
+ void recordRN() { ++ rNReductions; }
+
+ /// \brief Returns the number of RN reductions applied to solve the problem.
+ unsigned numRNReductions() const { return rNReductions; }
+
/// \brief Set the selection for a given node.
/// @param nItr Node iterator.
/// @param selection Selection for nItr.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index ea6b094d7efe..d4df4c548711 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Function.h"
@@ -37,16 +38,15 @@ STATISTIC(NumAtomic, "Number of atomic phis lowered");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-static RegisterPass<PHIElimination>
-X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false);
-const PassInfo *const llvm::PHIEliminationID = &X;
+char &llvm::PHIEliminationID = PHIElimination::ID;
void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
- // rdar://7401784 This would be nice:
- // AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -56,9 +56,11 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
// Split critical edges to help the coalescer
- if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>())
+ if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= SplitPHIEdges(MF, *I, *LV);
+ Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+ }
// Populate VRegPHIUseCount
analyzePHINodes(MF);
@@ -179,6 +181,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
bool isDead = MPhi->getOperand(0).isDead();
// Create a new register for the incoming PHI arguments.
@@ -265,6 +268,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Insert the copy.
if (!reusedIncoming && IncomingReg)
BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg);
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -378,10 +383,12 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
- LiveVariables &LV) {
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ bool Changed = false;
for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
@@ -390,8 +397,15 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
// We break edges when registers are live out from the predecessor block
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
- if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
- PreMBB->SplitCriticalEdge(&MBB, this);
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB != &MBB &&
+ !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+ if (!MLI ||
+ !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+ MLI->isLoopHeader(&MBB)))
+ Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+ }
}
}
return true;
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 7dedf0318a8a..45a97182e71c 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -13,19 +13,21 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
class LiveVariables;
+ class MachineRegisterInfo;
+ class MachineLoopInfo;
/// Lower PHI instructions to copies.
class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
+ MachineRegisterInfo *MRI; // Machine register information
public:
static char ID; // Pass identification, replacement for typeid
- PHIElimination() : MachineFunctionPass(&ID) {}
+ PHIElimination() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -49,7 +51,7 @@ namespace llvm {
/// Split critical edges where necessary for good coalescer performance.
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- LiveVariables &LV);
+ LiveVariables &LV, MachineLoopInfo *MLI);
/// SplitCriticalEdge - Split a critical edge from A to B by
/// inserting a new MBB. Update branches in A and PHI instructions
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 000000000000..17cee46ca16c
--- /dev/null
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,287 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumEliminated, "Number of compares eliminated");
+
+namespace {
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false);
+
+FunctionPass *llvm::createPeepholeOptimizerPass() {
+ return new PeepholeOptimizer();
+}
+
+/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ LocalMIs.insert(MI);
+
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+ if (++UI == MRI->use_nodbg_end())
+ // No other uses.
+ return false;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ UI = MRI->use_nodbg_begin(SrcReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg;
+ int CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ // Attempt to convert the defining instruction to set the "zero" flag.
+ if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+ ++NumEliminated;
+ return true;
+ }
+
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ LocalMIs.clear();
+
+ for (MachineBasicBlock::iterator
+ MII = I->begin(), ME = I->end(); MII != ME; ) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->getDesc().isCompare() &&
+ !MI->getDesc().hasUnmodeledSideEffects()) {
+ ++MII; // The iterator may become invalid if the compare is deleted.
+ Changed |= OptimizeCmpInstr(MI, MBB);
+ } else {
+ Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ ++MII;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 4af8e07f3480..f0bd6d1372be 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -85,7 +85,7 @@ namespace {
public:
static char ID;
PostRAScheduler(CodeGenOpt::Level ol) :
- MachineFunctionPass(&ID), OptLevel(ol) {}
+ MachineFunctionPass(ID), OptLevel(ol) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -130,7 +130,7 @@ namespace {
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
public:
SchedulePostRATDList(MachineFunction &MF,
@@ -140,7 +140,8 @@ namespace {
AntiDepBreaker *ADB,
AliasAnalysis *aa)
: ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
- HazardRec(HR), AntiDepBreak(ADB), AA(aa) {}
+ HazardRec(HR), AntiDepBreak(ADB), AA(aa),
+ KillIndices(TRI->getNumRegs()) {}
~SchedulePostRATDList() {
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index fb2f90935551..cd9d83eeb684 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -92,7 +92,7 @@ namespace {
public:
static char ID;
PreAllocSplitting()
- : MachineFunctionPass(&ID) {}
+ : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -203,10 +203,11 @@ namespace {
char PreAllocSplitting::ID = 0;
-static RegisterPass<PreAllocSplitting>
-X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
+INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+ "Pre-Register Allocation Live Interval Splitting",
+ false, false);
-const PassInfo *const llvm::PreAllocSplittingID = &X;
+char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
/// findSpillPoint - Find a gap as far away from the given MI that's suitable
/// for spilling the current live interval. The index must be before any
@@ -676,11 +677,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
// If the def is a move, set the copy field.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (DstReg == LI->reg)
- NewVN->setCopy(&*DI);
- } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
+ if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
NewVN->setCopy(&*DI);
NewVNs[&*DI] = NewVN;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 2e31908f9fe2..b8831db1d118 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,8 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
-static RegisterPass<ProcessImplicitDefs> X("processimpdefs",
- "Process Implicit Definitions.");
+INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions.", false, false);
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -46,12 +46,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
unsigned Reg, unsigned OpIdx,
const TargetInstrInfo *tii_,
SmallSet<unsigned, 8> &ImpDefRegs) {
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg &&
- (DstSubReg == 0 || ImpDefRegs.count(DstReg)))
- return true;
-
switch(OpIdx) {
case 1:
return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
@@ -75,14 +69,6 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
return true;
return false;
}
-
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
- if (Reg != SrcReg)
- return false;
- if (DstSubReg == 0 || ImpDefRegs.count(DstReg))
- return true;
- }
return false;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 3843b2537051..e2802c1fdf4a 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -19,6 +19,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -32,7 +33,10 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <climits>
@@ -40,8 +44,11 @@ using namespace llvm;
char PEI::ID = 0;
-static RegisterPass<PEI>
-X("prologepilog", "Prologue/Epilogue Insertion");
+INITIALIZE_PASS(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false);
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
/// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references.
@@ -56,7 +63,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
- FrameConstantRegMap.clear();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
@@ -72,10 +78,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
calculateCalleeSavedRegisters(Fn);
// Determine placement of CSR spill/restore code:
- // - with shrink wrapping, place spills and restores to tightly
+ // - With shrink wrapping, place spills and restores to tightly
// enclose regions in the Machine CFG of the function where
- // they are used. Without shrink wrapping
- // - default (no shrink wrapping), place all spills in the
+ // they are used.
+ // - Without shink wrapping (default), place all spills in the
// entry block, all restores in return blocks.
placeCSRSpillsAndRestores(Fn);
@@ -461,8 +467,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
Offset = (Offset + Align - 1) / Align * Align;
if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
MFI->setObjectOffset(FrameIdx, Offset);
Offset += MFI->getObjectSize(FrameIdx);
}
@@ -547,15 +555,66 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
// Make sure that the stack protector comes before the local variables on the
// stack.
- if (MFI->getStackProtectorIndex() >= 0)
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
Offset, MaxAlign);
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
// Then assign frame offsets to stack objects that are not used to spill
// callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && (int)i == RS->getScavengingFrameIndex())
@@ -564,6 +623,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (MFI->getStackProtectorIndex() == (int)i)
continue;
+ if (LargeStackObjs.count(i))
+ continue;
AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
}
@@ -694,16 +755,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TargetRegisterInfo::FrameIndexValue Value;
- unsigned VReg =
- TRI.eliminateFrameIndex(MI, SPAdj, &Value,
+ TRI.eliminateFrameIndex(MI, SPAdj,
FrameIndexVirtualScavenging ? NULL : RS);
- if (VReg) {
- assert (FrameIndexVirtualScavenging &&
- "Not scavenging, but virtual returned from "
- "eliminateFrameIndex()!");
- FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
- }
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -731,38 +784,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
}
}
-/// findLastUseReg - find the killing use of the specified register within
-/// the instruciton range. Return the operand number of the kill in Operand.
-static MachineBasicBlock::iterator
-findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
- unsigned Reg) {
- // Scan forward to find the last use of this virtual register
- for (++I; I != ME; ++I) {
- MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).isReg()) {
- unsigned OpReg = MI->getOperand(i).getReg();
- if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
- continue;
- assert (OpReg == Reg
- && "overlapping use of scavenged index register!");
- // If this is the killing use, we have a candidate.
- if (MI->getOperand(i).isKill())
- isKillInsn = true;
- else if (MI->getOperand(i).isDef())
- isDefInsn = true;
- }
- if (isKillInsn && !isDefInsn)
- return I;
- }
- // If we hit the end of the basic block, there was no kill of
- // the virtual register, which is wrong.
- assert (0 && "scavenged index register never killed!");
- return ME;
-}
-
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
@@ -772,27 +793,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
- // FIXME: The logic flow in this function is still too convoluted.
- // It needs a cleanup refactoring. Do that in preparation for tracking
- // more than one scratch register value and using ranges to find
- // available scratch registers.
- unsigned CurrentVirtReg = 0;
- unsigned CurrentScratchReg = 0;
- bool havePrevValue = false;
- TargetRegisterInfo::FrameIndexValue PrevValue(0,0);
- TargetRegisterInfo::FrameIndexValue Value(0,0);
- MachineInstr *PrevLastUseMI = NULL;
- unsigned PrevLastUseOp = 0;
- bool trackingCurrentValue = false;
+ unsigned VirtReg = 0;
+ unsigned ScratchReg = 0;
int SPAdj = 0;
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- bool clobbersScratchReg = false;
bool DoIncr = true;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isReg()) {
@@ -800,121 +808,30 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
unsigned Reg = MO.getReg();
if (Reg == 0)
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- // If we have a previous scratch reg, check and see if anything
- // here kills whatever value is in there.
- if (Reg == CurrentScratchReg) {
- if (MO.isUse()) {
- // Two-address operands implicitly kill
- if (MO.isKill() || MI->isRegTiedToDefOperand(i))
- clobbersScratchReg = true;
- } else {
- assert (MO.isDef());
- clobbersScratchReg = true;
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- }
- // If this is a def, remember that this insn defines the value.
- // This lets us properly consider insns which re-use the scratch
- // register, such as r2 = sub r2, #imm, in the middle of the
- // scratch range.
- if (MO.isDef())
- isDefInsn = true;
+
+ ++NumVirtualFrameRegs;
// Have we already allocated a scratch register for this virtual?
- if (Reg != CurrentVirtReg) {
+ if (Reg != VirtReg) {
// When we first encounter a new virtual register, it
// must be a definition.
assert(MI->getOperand(i).isDef() &&
"frame index virtual missing def!");
- // We can't have nested virtual register live ranges because
- // there's only a guarantee of one scavenged register at a time.
- assert (CurrentVirtReg == 0 &&
- "overlapping frame index virtual registers!");
-
- // If the target gave us information about what's in the register,
- // we can use that to re-use scratch regs.
- DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
- FrameConstantRegMap.find(Reg);
- trackingCurrentValue = Entry != FrameConstantRegMap.end();
- if (trackingCurrentValue) {
- SPAdj = (*Entry).second.second;
- Value = (*Entry).second.first;
- } else {
- SPAdj = 0;
- Value.first = 0;
- Value.second = 0;
- }
-
- // If the scratch register from the last allocation is still
- // available, see if the value matches. If it does, just re-use it.
- if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
- // FIXME: This assumes that the instructions in the live range
- // for the virtual register are exclusively for the purpose
- // of populating the value in the register. That's reasonable
- // for these frame index registers, but it's still a very, very
- // strong assumption. rdar://7322732. Better would be to
- // explicitly check each instruction in the range for references
- // to the virtual register. Only delete those insns that
- // touch the virtual register.
-
- // Find the last use of the new virtual register. Remove all
- // instruction between here and there, and update the current
- // instruction to reference the last use insn instead.
- MachineBasicBlock::iterator LastUseMI =
- findLastUseReg(I, BB->end(), Reg);
-
- // Remove all instructions up 'til the last use, since they're
- // just calculating the value we already have.
- BB->erase(I, LastUseMI);
- I = LastUseMI;
-
- // Extend the live range of the scratch register
- PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
- RS->setUsed(CurrentScratchReg);
- CurrentVirtReg = Reg;
-
- // We deleted the instruction we were scanning the operands of.
- // Jump back to the instruction iterator loop. Don't increment
- // past this instruction since we updated the iterator already.
- DoIncr = false;
- break;
- }
-
// Scavenge a new scratch register
- CurrentVirtReg = Reg;
+ VirtReg = Reg;
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
- PrevValue = Value;
+ ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+ ++NumScavengedRegs;
}
// replace this reference to the virtual register with the
// scratch register.
- assert (CurrentScratchReg && "Missing scratch register!");
- MI->getOperand(i).setReg(CurrentScratchReg);
+ assert (ScratchReg && "Missing scratch register!");
+ MI->getOperand(i).setReg(ScratchReg);
- if (MI->getOperand(i).isKill()) {
- isKillInsn = true;
- PrevLastUseOp = i;
- PrevLastUseMI = MI;
- }
}
}
- // If this is the last use of the scratch, stop tracking it. The
- // last use will be a kill operand in an instruction that does
- // not also define the scratch register.
- if (isKillInsn && !isDefInsn) {
- CurrentVirtReg = 0;
- havePrevValue = trackingCurrentValue;
- }
- // Similarly, notice if instruction clobbered the value in the
- // register we're tracking for possible later reuse. This is noted
- // above, but enforced here since the value is still live while we
- // process the rest of the operands of the instruction.
- if (clobbersScratchReg) {
- havePrevValue = false;
- CurrentScratchReg = 0;
- }
if (DoIncr) {
RS->forward(I);
++I;
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index aa95773596cf..d575124a6b3e 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,7 @@ namespace llvm {
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(&ID) {}
+ PEI() : MachineFunctionPass(ID) {}
const char *getPassName() const {
return "Prolog/Epilog Insertion & Frame Finalization";
@@ -99,13 +99,6 @@ namespace llvm {
// TRI->requiresFrameIndexScavenging() for the curren function.
bool FrameIndexVirtualScavenging;
- // When using the scavenger post-pass to resolve frame reference
- // materialization registers, maintain a map of the registers to
- // the constant value and SP adjustment associated with it.
- typedef std::pair<TargetRegisterInfo::FrameIndexValue, int>
- FrameConstantEntry;
- DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
-
#ifndef NDEBUG
// Machine function handle.
MachineFunction* MF;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index f44478e5dd0b..fc150d55e226 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -16,6 +16,7 @@
#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -46,7 +47,7 @@ namespace {
class RAFast : public MachineFunctionPass {
public:
static char ID;
- RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1),
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
isBulkSpilling(false) {}
private:
const TargetMachine *TM;
@@ -80,6 +81,8 @@ namespace {
// that is currently available in a physical register.
LiveRegMap LiveVirtRegs;
+ DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+
// RegState - Track the state of a physical register.
enum RegState {
// A disabled register is not available for allocation, but an alias may
@@ -110,9 +113,9 @@ namespace {
// Allocatable - vector of allocatable physical registers.
BitVector Allocatable;
- // SkippedInstrs - Descriptors of instructions whose clobber list was ignored
- // because all registers were spilled. It is still necessary to mark all the
- // clobbered registers as used by the function.
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
// isBulkSpilling - This flag is set when LiveRegMap will be cleared
@@ -236,8 +239,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
}
/// spillVirtReg - This method spills the value specified by VirtReg into the
-/// corresponding stack slot if needed. If isKill is set, the register is also
-/// killed.
+/// corresponding stack slot if needed.
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
@@ -265,6 +267,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
++NumStores; // Update statistics
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+ const MDNode *MDPtr =
+ DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+ int64_t Offset = 0;
+ if (DBG->getOperand(1).isImm())
+ Offset = DBG->getOperand(1).getImm();
+ DebugLoc DL;
+ if (MI == MBB->end()) {
+ // If MI is at basic block end then use last instruction's location.
+ MachineBasicBlock::iterator EI = MI;
+ DL = (--EI)->getDebugLoc();
+ }
+ else
+ DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+ MachineBasicBlock *MBB = DBG->getParent();
+ MBB->insert(MI, NewDV);
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ LiveDbgValueMap[LRI->first] = NewDV;
+ }
+ }
if (SpillKill)
LR.LastUse = 0; // Don't kill register again
}
@@ -471,7 +498,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
// First try to find a completely free register.
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) &&
+ Allocatable.test(PhysReg))
return assignVirtToPhysReg(LRE, PhysReg);
}
@@ -480,6 +508,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
unsigned BestReg = 0, BestCost = spillImpossible;
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ if (!Allocatable.test(*I))
+ continue;
unsigned Cost = calcSpillCost(*I);
// Cost is 0 when all aliases are already disabled.
if (Cost == 0)
@@ -520,12 +550,9 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
// It's a copy, use the destination register as a hint.
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
- else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
- Hint = DstReg;
}
allocVirtReg(MI, *LRI, Hint);
} else if (LR.LastUse) {
@@ -712,7 +739,8 @@ void RAFast::AllocateBasicBlock() {
// Add live-in registers as live.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- definePhysReg(MII, *I, regReserved);
+ if (Allocatable.test(*I))
+ definePhysReg(MII, *I, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
@@ -756,31 +784,43 @@ void RAFast::AllocateBasicBlock() {
// Debug values are not allowed to change codegen in any way.
if (MI->isDebugValue()) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
- if (LRI != LiveVirtRegs.end())
- setPhysReg(MI, i, LRI->second.PhysReg);
- else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS == -1)
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ LiveDbgValueMap[Reg] = MI;
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->second.PhysReg);
else {
- // Modify DBG_VALUE now that the value is in a spill slot.
- uint64_t Offset = MI->getOperand(1).getImm();
- const MDNode *MDPtr =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
- DebugLoc DL = MI->getDebugLoc();
- if (MachineInstr *NewDV =
- TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
- MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MBB->erase(MI), NewDV);
- } else
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1)
+ // We can't allocate a physreg for a DebugValue, sorry!
+ MO.setReg(0);
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ int64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ } else
+ // We can't allocate a physreg for a DebugValue; sorry!
+ MO.setReg(0);
+ }
}
}
}
@@ -789,14 +829,13 @@ void RAFast::AllocateBasicBlock() {
}
// If this is a copy, we may be able to coalesce.
- unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
if (MI->isCopy()) {
CopyDst = MI->getOperand(0).getReg();
CopySrc = MI->getOperand(1).getReg();
CopyDstSub = MI->getOperand(0).getSubReg();
CopySrcSub = MI->getOperand(1).getSubReg();
- } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
- CopySrc = CopyDst = 0;
+ }
// Track registers used by instruction.
UsedInInstr.reset();
@@ -843,13 +882,18 @@ void RAFast::AllocateBasicBlock() {
// operands. If there are also physical defs, these registers must avoid
// both physical defs and uses, making them more constrained than normal
// operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
// We didn't detect inline asm tied operands above, so just make this extra
// pass for all inline asm.
if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && hasPhysDefs)) {
+ (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
}
// Second scan.
@@ -870,14 +914,17 @@ void RAFast::AllocateBasicBlock() {
MRI->addPhysRegsUsed(UsedInInstr);
- // Track registers defined by instruction - early clobbers at this point.
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
UsedInInstr.reset();
if (hasEarlyClobbers) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef()) continue;
+ if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
UsedInInstr.set(Reg);
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
UsedInInstr.set(*AS);
@@ -887,9 +934,9 @@ void RAFast::AllocateBasicBlock() {
unsigned DefOpEnd = MI->getNumOperands();
if (TID.isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
- // exception is thrown, the landing pad is going to expect to find registers
- // in their spill slots, and 2. we don't have to wade through all the
- // <imp-def> operands on the call instruction.
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
DefOpEnd = VirtOpEnd;
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
@@ -992,6 +1039,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
SkippedInstrs.clear();
StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
return true;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 044672d6d7a5..5c62354a8872 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -87,10 +87,10 @@ namespace {
"to skip."),
cl::init(0),
cl::Hidden);
-
+
struct RALinScan : public MachineFunctionPass {
static char ID;
- RALinScan() : MachineFunctionPass(&ID) {
+ RALinScan() : MachineFunctionPass(ID) {
// Initialize the queue to record recently-used registers.
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -125,9 +125,10 @@ namespace {
const TargetRegisterInfo* tri_;
const TargetInstrInfo* tii_;
BitVector allocatableRegs_;
+ BitVector reservedRegs_;
LiveIntervals* li_;
LiveStacks* ls_;
- const MachineLoopInfo *loopInfo;
+ MachineLoopInfo *loopInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
/// start value. This is uses for backtracking.
@@ -255,9 +256,9 @@ namespace {
SmallVector<LiveInterval*, 8> &SpillIntervals);
/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
- /// try allocate the definition the same register as the source register
- /// if the register is not defined during live time of the interval. This
- /// eliminate a copy. This is used to coalesce copies which were not
+ /// try to allocate the definition to the same register as the source,
+ /// if the register is not defined during the life time of the interval.
+ /// This eliminates a copy, and is used to coalesce copies which were not
/// coalesced away before allocation either due to dest and src being in
/// different register classes or because the coalescer was overly
/// conservative.
@@ -335,6 +336,17 @@ namespace {
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
+ /// getFirstNonReservedPhysReg - return the first non-reserved physical
+ /// register in the register class.
+ unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+ TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
+ TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
+ while (i != aoe && reservedRegs_.test(*i))
+ ++i;
+ assert(i != aoe && "All registers reserved?!");
+ return *i;
+ }
+
void ComputeRelatedRegClasses();
template <typename ItTy>
@@ -358,8 +370,8 @@ namespace {
char RALinScan::ID = 0;
}
-static RegisterPass<RALinScan>
-X("linearscan-regalloc", "Linear Scan Register Allocator");
+INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false);
void RALinScan::ComputeRelatedRegClasses() {
// First pass, add all reg classes to the union, and determine at least one
@@ -371,7 +383,7 @@ void RALinScan::ComputeRelatedRegClasses() {
for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
I != E; ++I) {
HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
+
const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
if (PRC) {
// Already processed this register. Just make sure we know that
@@ -382,7 +394,7 @@ void RALinScan::ComputeRelatedRegClasses() {
}
}
}
-
+
// Second pass, now that we know conservatively what register classes each reg
// belongs to, add info about aliases. We don't need to do this for targets
// without register aliases.
@@ -419,20 +431,15 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned CandReg;
{
MachineInstr *CopyMI;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (vni->def != SlotIndex() && vni->isDefAccurate() &&
- (CopyMI = li_->getInstructionFromIndex(vni->def)) &&
- (CopyMI->isCopy() ||
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)))
+ (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
// Defined by a copy, try to extend SrcReg forward
- CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg;
+ CandReg = CopyMI->getOperand(1).getReg();
else if (TrivCoalesceEnds &&
- (CopyMI =
- li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- cur.reg == SrcReg)
+ (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+ CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
// Only used by a copy, try to extend DstReg backwards
- CandReg = DstReg;
+ CandReg = CopyMI->getOperand(0).getReg();
else
return Reg;
}
@@ -469,6 +476,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
tri_ = tm_->getRegisterInfo();
tii_ = tm_->getInstrInfo();
allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
li_ = &getAnalysis<LiveIntervals>();
ls_ = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
@@ -487,9 +495,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
vrm_ = &getAnalysis<VirtRegMap>();
if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
- spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_));
-
+
+ spiller_.reset(createSpiller(*this, *mf_, *vrm_));
+
initIntervalSets();
linearScan();
@@ -543,7 +551,7 @@ void RALinScan::linearScan() {
// linear scan algorithm
DEBUG({
dbgs() << "********** LINEAR SCAN **********\n"
- << "********** Function: "
+ << "********** Function: "
<< mf_->getFunction()->getName() << '\n';
printIntervals("fixed", fixed_.begin(), fixed_.end());
});
@@ -765,7 +773,8 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
return IP.end();
}
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+ SlotIndex Point){
for (unsigned i = 0, e = V.size(); i != e; ++i) {
RALinScan::IntervalPtr &IP = V[i];
LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -804,7 +813,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
static
float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
MachineRegisterInfo *mri_,
- const MachineLoopInfo *loopInfo) {
+ MachineLoopInfo *loopInfo) {
float Conflicts = 0;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
E = mri_->reg_end(); I != E; ++I) {
@@ -837,7 +846,7 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
dbgs() << tri_->getName(Candidates[i].first) << " ";
dbgs() << "\n";
});
-
+
// Calculate the number of conflicts of each candidate.
for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
unsigned Reg = i->first->reg;
@@ -955,7 +964,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (cur->empty()) {
unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
- physReg = *RC->allocation_order_begin(*mf_);
+ physReg = getFirstNonReservedPhysReg(RC);
DEBUG(dbgs() << tri_->getName(physReg) << '\n');
// Note the register is not really in use.
vrm_->assignVirt2Phys(cur->reg, physReg);
@@ -978,27 +987,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if ((vni->def != SlotIndex()) && !vni->isUnused() &&
vni->isDefAccurate()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (CopyMI &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
- unsigned Reg = 0;
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- Reg = SrcReg;
- else if (vrm_->isAssignedReg(SrcReg))
- Reg = vrm_->getPhys(SrcReg);
- if (Reg) {
- if (SrcSubReg)
- Reg = tri_->getSubReg(Reg, SrcSubReg);
- if (DstSubReg)
- Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
- if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
- mri_->setRegAllocationHint(cur->reg, 0, Reg);
- }
- } else if (CopyMI && CopyMI->isCopy()) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubReg = CopyMI->getOperand(0).getSubReg();
- SrcReg = CopyMI->getOperand(1).getReg();
- SrcSubReg = CopyMI->getOperand(1).getSubReg();
+ if (CopyMI && CopyMI->isCopy()) {
+ unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
+ unsigned SrcReg = CopyMI->getOperand(1).getReg();
+ unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
unsigned Reg = 0;
if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
Reg = SrcReg;
@@ -1024,7 +1016,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Can only allocate virtual registers!");
const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
cur->overlapsFrom(*i->first, i->second-1)) {
@@ -1033,7 +1025,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
}
}
-
+
// Speculatively check to see if we can get a register right now. If not,
// we know we won't be able to by adding more constraints. If so, we can
// check to see if it is valid. Doing an exhaustive search of the fixed_ list
@@ -1048,7 +1040,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
SmallSet<unsigned, 8> RegAliases;
for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
RegAliases.insert(*AS);
-
+
bool ConflictsWithFixed = false;
for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
IntervalPtr &IP = fixed_[i];
@@ -1068,7 +1060,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
}
}
}
-
+
// Okay, the register picked by our speculative getFreePhysReg call turned
// out to be in use. Actually add all of the conflicting fixed registers to
// regUse_ so we can do an accurate query.
@@ -1080,7 +1072,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
LiveInterval *I = IP.first;
const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
I->endIndex() > StartPosition) {
LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
IP.second = II;
@@ -1099,11 +1091,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
physReg = getFreePhysReg(cur);
}
}
-
+
// Restore the physical register tracker, removing information about the
// future.
restoreRegUses();
-
+
// If we find a free register, we are done: assign this virtual to
// the free physical register and add this interval to the active
// list.
@@ -1118,7 +1110,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
UpgradeRegister(physReg);
if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
// "Downgrade" physReg to try to keep physReg from being allocated until
- // the next reload from the same SS is allocated.
+ // the next reload from the same SS is allocated.
mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
DowngradeRegister(cur, physReg);
}
@@ -1131,7 +1123,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
for (std::vector<std::pair<unsigned, float> >::iterator
I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
+
// for each interval in active, update spill weights.
for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
i != e; ++i) {
@@ -1141,7 +1133,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
reg = vrm_->getPhys(reg);
updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
}
-
+
DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
// Find a register to spill.
@@ -1155,17 +1147,22 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
e = RC->allocation_order_end(*mf_); i != e; ++i) {
unsigned reg = *i;
float regWeight = SpillWeights[reg];
- // Skip recently allocated registers.
+ // Don't even consider reserved regs.
+ if (reservedRegs_.test(reg))
+ continue;
+ // Skip recently allocated registers and reserved registers.
if (minWeight > regWeight && !isRecentlyUsed(reg))
Found = true;
RegsWeights.push_back(std::make_pair(reg, regWeight));
}
-
+
// If we didn't find a register that is spillable, try aliases?
if (!Found) {
for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
e = RC->allocation_order_end(*mf_); i != e; ++i) {
unsigned reg = *i;
+ if (reservedRegs_.test(reg))
+ continue;
// No need to worry about if the alias register size < regsize of RC.
// We are going to spill all registers that alias it anyway.
for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1179,7 +1176,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
minWeight = RegsWeights[0].second;
if (minWeight == HUGE_VALF) {
// All registers must have inf weight. Just grab one!
- minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
if (cur->weight == HUGE_VALF ||
li_->getApproximateInstructionCount(*cur) == 0) {
// Spill a physical register around defs and uses.
@@ -1224,8 +1221,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// linearscan.
if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
- SmallVector<LiveInterval*, 8> spillIs;
- std::vector<LiveInterval*> added;
+ SmallVector<LiveInterval*, 8> spillIs, added;
spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
@@ -1288,27 +1284,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// The earliest start of a Spilled interval indicates up to where
// in handled we need to roll back
- assert(!spillIs.empty() && "No spill intervals?");
+ assert(!spillIs.empty() && "No spill intervals?");
SlotIndex earliestStart = spillIs[0]->beginIndex();
-
+
// Spill live intervals of virtual regs mapped to the physical register we
// want to clear (and its aliases). We only spill those that overlap with the
// current interval as the rest do not affect its allocation. we also keep
// track of the earliest start of all spilled live intervals since this will
// mark our rollback point.
- std::vector<LiveInterval*> added;
+ SmallVector<LiveInterval*, 8> added;
while (!spillIs.empty()) {
LiveInterval *sli = spillIs.back();
spillIs.pop_back();
DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
-
- spiller_->spill(sli, added, spillIs, &earliestStart);
+ spiller_->spill(sli, added, spillIs);
addStackInterval(sli, ls_, li_, mri_, *vrm_);
spilled.insert(sli->reg);
}
+ // Include any added intervals in earliestStart.
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ SlotIndex SI = added[i]->beginIndex();
+ if (SI < earliestStart)
+ earliestStart = SI;
+ }
+
DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
// Scan handled in reverse order up to the earliest start of a
@@ -1431,6 +1433,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
// Skip recently allocated registers.
if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
FreeReg = Reg;
@@ -1459,6 +1464,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
FreeReg = Reg;
@@ -1479,17 +1487,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
SmallVector<unsigned, 256> inactiveCounts;
unsigned MaxInactiveCount = 0;
-
+
const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
+
for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
i != e; ++i) {
unsigned reg = i->first->reg;
assert(TargetRegisterInfo::isVirtualRegister(reg) &&
"Can only allocate virtual registers!");
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
@@ -1506,7 +1514,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
unsigned Preference = vrm_->getRegAllocPref(cur->reg);
if (Preference) {
DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
- if (isRegAvail(Preference) &&
+ if (isRegAvail(Preference) &&
RC->contains(Preference))
return Preference;
}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 7e61a12a7eea..61f337bab49c 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,8 @@
#include "PBQP/HeuristicSolver.h"
#include "PBQP/Graph.h"
#include "PBQP/Heuristics/Briggs.h"
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
@@ -65,6 +67,11 @@ pbqpCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+ cl::desc("Pre-splite before PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
namespace {
///
@@ -77,7 +84,7 @@ namespace {
static char ID;
/// Construct a PBQP register allocator.
- PBQPRegAlloc() : MachineFunctionPass(&ID) {}
+ PBQPRegAlloc() : MachineFunctionPass(ID) {}
/// Return the pass name.
virtual const char* getPassName() const {
@@ -96,7 +103,10 @@ namespace {
au.addPreserved<LiveStacks>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
}
@@ -104,7 +114,15 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
private:
- typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+
+ class LIOrdering {
+ public:
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
typedef std::vector<const LiveInterval*> Node2LIMap;
typedef std::vector<unsigned> AllowedSet;
typedef std::vector<AllowedSet> AllowedSetMap;
@@ -112,7 +130,7 @@ namespace {
typedef std::pair<unsigned, unsigned> RegPair;
typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::set<LiveInterval*> LiveIntervalSet;
+ typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
@@ -122,6 +140,7 @@ namespace {
const TargetInstrInfo *tii;
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
LiveIntervals *lis;
LiveStacks *lss;
@@ -379,12 +398,14 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
iItr != iEnd; ++iItr) {
const MachineInstr *instr = &*iItr;
- unsigned srcReg, dstReg, srcSubReg, dstSubReg;
// If this isn't a copy then continue to the next instruction.
- if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg))
+ if (!instr->isCopy())
continue;
+ unsigned srcReg = instr->getOperand(1).getReg();
+ unsigned dstReg = instr->getOperand(0).getReg();
+
// If the registers are already the same our job is nice and easy.
if (dstReg == srcReg)
continue;
@@ -567,6 +588,8 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
// Resize allowedSets container appropriately.
allowedSets.resize(vregIntervalsToAlloc.size());
+ BitVector ReservedRegs = tri->getReservedRegs(*mf);
+
// Iterate over virtual register intervals to compute allowed sets...
for (unsigned node = 0; node < node2LI.size(); ++node) {
@@ -575,8 +598,12 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
// Start by assuming all allocable registers in the class are allowed...
- RegVector liAllowed(liRC->allocation_order_begin(*mf),
- liRC->allocation_order_end(*mf));
+ RegVector liAllowed;
+ TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
+ TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
+ for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
+ if (!ReservedRegs.test(*it))
+ liAllowed.push_back(*it);
// Eliminate the physical registers which overlap with this range, along
// with all their aliases.
@@ -735,9 +762,11 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
const LiveInterval *spillInterval = node2LI[node];
double oldSpillWeight = spillInterval->weight;
SmallVector<LiveInterval*, 8> spillIs;
+ rmf->rememberUseDefs(spillInterval);
std::vector<LiveInterval*> newSpills =
lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
addStackInterval(spillInterval, mri);
+ rmf->rememberSpills(spillInterval, newSpills);
(void) oldSpillWeight;
DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
@@ -845,9 +874,11 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
+ rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
+
DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
// Allocator main loop:
@@ -884,6 +915,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
// Finalise allocation, allocate empty ranges.
finalizeAlloc();
+ rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
vregIntervalsToAlloc.clear();
emptyVRegIntervals.clear();
li2Node.clear();
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index ab0bc2d78a60..02b5539f0f4f 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -54,9 +54,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
Src = MI->getOperand(2).getReg();
SrcSub = MI->getOperand(2).getSubReg();
- } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) {
+ } else
return false;
- }
return true;
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 43b3fb642635..a2580b85bcc3 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -21,7 +21,9 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -226,19 +228,14 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
used = ~RegsAvailable & ~ReservedRegs;
}
-/// CreateRegClassMask - Set the bits that represent the registers in the
-/// TargetRegisterClass.
-static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
- ++I)
- Mask.set(*I);
-}
-
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I))
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
return *I;
+ }
return 0;
}
@@ -325,11 +322,9 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
- // Mask off the registers which are not in the TargetRegisterClass.
- BitVector Candidates(NumPhysRegs, false);
- CreateRegClassMask(RC, Candidates);
- // Do not include reserved registers.
- Candidates ^= ReservedRegs & Candidates;
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
// Exclude all the registers being used by the instruction.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -349,8 +344,10 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
- if (!isAliasUsed(SReg))
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
return SReg;
+ }
assert(ScavengedReg == 0 &&
"Scavenger slot is live, unable to scavenge another register!");
@@ -366,12 +363,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
"Cannot scavenge register without an emergency spill slot!");
TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
MachineBasicBlock::iterator II = prior(I);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
// Restore the scavenged register before its use (or first terminator).
TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
II = prior(UseMI);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
}
ScavengeRestore = prior(UseMI);
@@ -380,5 +377,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// ScavengedReg = SReg;
ScavengedRC = RC;
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
return SReg;
}
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 000000000000..93426eecbbc1
--- /dev/null
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1014 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false);
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+ cl::desc("Appended to function name to get output file name "
+ "(default: \".html\")"),
+ cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+ cl::desc("Coma seperated list of functions to render"
+ ", or \"*\"."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+ cl::desc("Register classes to render pressure for."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+ cl::desc("Live intervals to show alongside code."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+ cl::desc("Don't display empty intervals."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+ cl::desc("Render indexes not associated with instructions or "
+ "MBB starts."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+ cl::desc("Use SVG for vertical text."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+ cl::desc("Pretty print HTML. For debugging the renderer only.."),
+ cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+ bool MFRenderingOptions::renderingOptionsProcessed;
+ std::set<std::string> MFRenderingOptions::mfNamesToRender;
+ bool MFRenderingOptions::renderAllMFs = false;
+
+ std::set<std::string> MFRenderingOptions::classNamesToRender;
+ bool MFRenderingOptions::renderAllClasses = false;
+
+ std::set<std::pair<unsigned, unsigned> >
+ MFRenderingOptions::intervalNumsToRender;
+ unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+ template <typename OutputItr>
+ void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+ OutputItr outItr) {
+ std::string::const_iterator curPos = s.begin();
+ std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+ while (nextComa != s.end()) {
+ std::string elem;
+ std::copy(curPos, nextComa, std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ curPos = llvm::next(nextComa);
+ nextComa = std::find(curPos, s.end(), ',');
+ }
+
+ if (curPos != s.end()) {
+ std::string elem;
+ std::copy(curPos, s.end(), std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ }
+ }
+
+ void MFRenderingOptions::processOptions() {
+ if (!renderingOptionsProcessed) {
+ processFuncNames();
+ processRegClassNames();
+ processIntervalNumbers();
+ renderingOptionsProcessed = true;
+ }
+ }
+
+ void MFRenderingOptions::processFuncNames() {
+ if (machineFuncsToRender == "*") {
+ renderAllMFs = true;
+ } else {
+ splitComaSeperatedList(machineFuncsToRender,
+ std::inserter(mfNamesToRender,
+ mfNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processRegClassNames() {
+ if (pressureClasses == "*") {
+ renderAllClasses = true;
+ } else {
+ splitComaSeperatedList(pressureClasses,
+ std::inserter(classNamesToRender,
+ classNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processIntervalNumbers() {
+ std::set<std::string> intervalRanges;
+ splitComaSeperatedList(showIntervals,
+ std::inserter(intervalRanges,
+ intervalRanges.begin()));
+ std::for_each(intervalRanges.begin(), intervalRanges.end(),
+ processIntervalRange);
+ }
+
+ void MFRenderingOptions::processIntervalRange(
+ const std::string &intervalRangeStr) {
+ if (intervalRangeStr == "*") {
+ intervalTypesToRender |= All;
+ } else if (intervalRangeStr == "virt-nospills*") {
+ intervalTypesToRender |= VirtNoSpills;
+ } else if (intervalRangeStr == "spills*") {
+ intervalTypesToRender |= VirtSpills;
+ } else if (intervalRangeStr == "virt*") {
+ intervalTypesToRender |= AllVirt;
+ } else if (intervalRangeStr == "phys*") {
+ intervalTypesToRender |= AllPhys;
+ } else {
+ std::istringstream iss(intervalRangeStr);
+ unsigned reg1, reg2;
+ if ((iss >> reg1 >> std::ws)) {
+ if (iss.eof()) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+ } else {
+ char c;
+ iss >> c;
+ if (c == '-' && (iss >> reg2)) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+ } else {
+ dbgs() << "Warning: Invalid interval range \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ } else {
+ dbgs() << "Warning: Invalid interval number \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ }
+
+ void MFRenderingOptions::setup(MachineFunction *mf,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis,
+ const RenderMachineFunction *rmf) {
+ this->mf = mf;
+ this->tri = tri;
+ this->lis = lis;
+ this->rmf = rmf;
+
+ clear();
+ }
+
+ void MFRenderingOptions::clear() {
+ regClassesTranslatedToCurrentFunction = false;
+ regClassSet.clear();
+
+ intervalsTranslatedToCurrentFunction = false;
+ intervalSet.clear();
+ }
+
+ void MFRenderingOptions::resetRenderSpecificOptions() {
+ intervalSet.clear();
+ intervalsTranslatedToCurrentFunction = false;
+ }
+
+ bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+ processOptions();
+
+ return (renderAllMFs ||
+ mfNamesToRender.find(mf->getFunction()->getName()) !=
+ mfNamesToRender.end());
+ }
+
+ const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+ translateRegClassNamesToCurrentFunction();
+ return regClassSet;
+ }
+
+ const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+ translateIntervalNumbersToCurrentFunction();
+ return intervalSet;
+ }
+
+ bool MFRenderingOptions::renderEmptyIndexes() const {
+ return showEmptyIndexes;
+ }
+
+ bool MFRenderingOptions::fancyVerticals() const {
+ return useFancyVerticals;
+ }
+
+ void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+ if (!regClassesTranslatedToCurrentFunction) {
+ processOptions();
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ if (renderAllClasses ||
+ classNamesToRender.find(trc->getName()) !=
+ classNamesToRender.end()) {
+ regClassSet.insert(trc);
+ }
+ }
+ regClassesTranslatedToCurrentFunction = true;
+ }
+ }
+
+ void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+ if (!intervalsTranslatedToCurrentFunction) {
+ processOptions();
+
+ // If we're not just doing explicit then do a copy over all matching
+ // types.
+ if (intervalTypesToRender != ExplicitOnly) {
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (filterEmpty && li->empty())
+ continue;
+
+ if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+ (intervalTypesToRender & AllPhys))) {
+ intervalSet.insert(li);
+ } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+ if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
+ ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+ intervalSet.insert(li);
+ }
+ }
+ }
+ }
+
+ // If we need to process the explicit list...
+ if (intervalTypesToRender != All) {
+ for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+ regRangeItr = intervalNumsToRender.begin(),
+ regRangeEnd = intervalNumsToRender.end();
+ regRangeItr != regRangeEnd; ++regRangeItr) {
+ const std::pair<unsigned, unsigned> &range = *regRangeItr;
+ for (unsigned reg = range.first; reg != range.second; ++reg) {
+ if (lis->hasInterval(reg)) {
+ intervalSet.insert(&lis->getInterval(reg));
+ }
+ }
+ }
+ }
+
+ intervalsTranslatedToCurrentFunction = true;
+ }
+ }
+
+ // ---------- TargetRegisterExtraInformation implementation ----------
+
+ TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+ : mapsPopulated(false) {
+ }
+
+ void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+ MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis) {
+ this->mf = mf;
+ this->mri = mri;
+ this->tri = tri;
+ this->lis = lis;
+ }
+
+ void TargetRegisterExtraInfo::reset() {
+ if (!mapsPopulated) {
+ initWorst();
+ //initBounds();
+ initCapacity();
+ mapsPopulated = true;
+ }
+
+ resetPressureAndLiveStates();
+ }
+
+ void TargetRegisterExtraInfo::clear() {
+ prWorst.clear();
+ vrWorst.clear();
+ capacityMap.clear();
+ pressureMap.clear();
+ //liveStatesMap.clear();
+ mapsPopulated = false;
+ }
+
+ void TargetRegisterExtraInfo::initWorst() {
+ assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+ "Worst map already initialised?");
+
+ // Start with the physical registers.
+ for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+ WorstMapLine &pregLine = prWorst[preg];
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ unsigned numOverlaps = 0;
+ for (TargetRegisterClass::iterator rItr = trc->begin(),
+ rEnd = trc->end();
+ rItr != rEnd; ++rItr) {
+ unsigned trcPReg = *rItr;
+ if (tri->regsOverlap(preg, trcPReg))
+ ++numOverlaps;
+ }
+
+ pregLine[trc] = numOverlaps;
+ }
+ }
+
+ // Now the register classes.
+ for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rc1Itr != rcEnd; ++rc1Itr) {
+ const TargetRegisterClass *trc1 = *rc1Itr;
+ WorstMapLine &classLine = vrWorst[trc1];
+
+ for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+ rc2Itr != rcEnd; ++rc2Itr) {
+ const TargetRegisterClass *trc2 = *rc2Itr;
+
+ unsigned worst = 0;
+
+ for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+ trc1End = trc1->end();
+ trc1Itr != trc1End; ++trc1Itr) {
+ unsigned trc1Reg = *trc1Itr;
+ unsigned trc1RegWorst = 0;
+
+ for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+ trc2End = trc2->end();
+ trc2Itr != trc2End; ++trc2Itr) {
+ unsigned trc2Reg = *trc2Itr;
+ if (tri->regsOverlap(trc1Reg, trc2Reg))
+ ++trc1RegWorst;
+ }
+ if (trc1RegWorst > worst) {
+ worst = trc1RegWorst;
+ }
+ }
+
+ if (worst != 0) {
+ classLine[trc2] = worst;
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getWorst(
+ unsigned reg,
+ const TargetRegisterClass *trc) const {
+ const WorstMapLine *wml = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+ assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+ wml = &prwItr->second;
+ } else {
+ const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+ VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+ assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+ wml = &vrwItr->second;
+ }
+
+ WorstMapLine::const_iterator wmlItr = wml->find(trc);
+ if (wmlItr == wml->end())
+ return 0;
+
+ return wmlItr->second;
+ }
+
+ void TargetRegisterExtraInfo::initCapacity() {
+ assert(!mapsPopulated && capacityMap.empty() &&
+ "Capacity map already initialised?");
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
+ trc->allocation_order_end(*mf));
+
+ if (capacity != 0)
+ capacityMap[trc] = capacity;
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getCapacity(
+ const TargetRegisterClass *trc) const {
+ CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+ assert(cmItr != capacityMap.end() &&
+ "vreg with unallocable register class");
+ return cmItr->second;
+ }
+
+ void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+ pressureMap.clear();
+ //liveStatesMap.clear();
+
+ // Iterate over all slots.
+
+
+ // Iterate over all live intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ const TargetRegisterClass *liTRC;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+ continue;
+
+ liTRC = mri->getRegClass(li->reg);
+
+
+ // For all ranges in the current interal.
+ for (LiveInterval::iterator lrItr = li->begin(),
+ lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+ LiveRange *lr = &*lrItr;
+
+ // For all slots in the current range.
+ for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+ // Record increased pressure at index for all overlapping classes.
+ for (TargetRegisterInfo::regclass_iterator
+ rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ if (trc->allocation_order_begin(*mf) ==
+ trc->allocation_order_end(*mf))
+ continue;
+
+ unsigned worstAtI = getWorst(li->reg, trc);
+
+ if (worstAtI != 0) {
+ pressureMap[i][trc] += worstAtI;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ PressureMap::const_iterator pmItr = pressureMap.find(i);
+ if (pmItr == pressureMap.end())
+ return 0;
+ const PressureMapLine &pmLine = pmItr->second;
+ PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+ if (pmlItr == pmLine.end())
+ return 0;
+ return pmlItr->second;
+ }
+
+ bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ return (getPressureAtSlot(trc, i) > getCapacity(trc));
+ }
+
+ // ---------- MachineFunctionRenderer implementation ----------
+
+ void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+ if (!prettyHTML)
+ return;
+ for (unsigned i = 0; i < ns; ++i) {
+ os << " ";
+ }
+ }
+
+ RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+ return Spacer(ns);
+ }
+
+ raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+ s.print(os);
+ return os;
+ }
+
+ template <typename Iterator>
+ std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+ std::string r;
+
+ for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+ char c = *sItr;
+
+ switch (c) {
+ case '<': r.append("&lt;"); break;
+ case '>': r.append("&gt;"); break;
+ case '&': r.append("&amp;"); break;
+ case ' ': r.append("&nbsp;"); break;
+ case '\"': r.append("&quot;"); break;
+ default: r.push_back(c); break;
+ }
+ }
+
+ return r;
+ }
+
+ RenderMachineFunction::LiveState
+ RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+ SlotIndex i) const {
+ const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+ // For uses/defs recorded use/def indexes override current liveness and
+ // instruction operands (Only for the interval which records the indexes).
+ if (i.isUse() || i.isDef()) {
+ UseDefs::const_iterator udItr = useDefs.find(li);
+ if (udItr != useDefs.end()) {
+ const SlotSet &slotSet = udItr->second;
+ if (slotSet.count(i)) {
+ if (i.isUse()) {
+ return Used;
+ }
+ // else
+ return Defined;
+ }
+ }
+ }
+
+ // If the slot is a load/store, or there's no info in the use/def set then
+ // use liveness and instruction operand info.
+ if (li->liveAt(i)) {
+
+ if (mi == 0) {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ } else {
+ if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ return Defined;
+ } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ return Used;
+ } else {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ }
+ }
+ }
+ return Dead;
+ }
+
+ RenderMachineFunction::PressureState
+ RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ if (trei.getPressureAtSlot(trc, i) == 0) {
+ return Zero;
+ } else if (trei.classOverCapacityAtSlot(trc, i)){
+ return High;
+ }
+ return Low;
+ }
+
+ /// \brief Render a machine instruction.
+ void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ oss << *mi;
+
+ os << escapeChars(oss.str());
+ }
+
+ template <typename T>
+ void RenderMachineFunction::renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const {
+ if (ro.fancyVerticals()) {
+ os << indent << "<object\n"
+ << indent + s(2) << "class=\"obj\"\n"
+ << indent + s(2) << "type=\"image/svg+xml\"\n"
+ << indent + s(2) << "width=\"14px\"\n"
+ << indent + s(2) << "height=\"55px\"\n"
+ << indent + s(2) << "data=\"data:image/svg+xml,\n"
+ << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+ << indent + s(6) << "<text x='-55' y='10' "
+ "font-family='Courier' font-size='12' "
+ "transform='rotate(-90)' "
+ "text-rendering='optimizeSpeed' "
+ "fill='#000'>" << t << "</text>\n"
+ << indent + s(4) << "</svg>\">\n"
+ << indent << "</object>\n";
+ } else {
+ std::ostringstream oss;
+ oss << t;
+ std::string tStr(oss.str());
+
+ os << indent;
+ for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+ tStrItr != tStrEnd; ++tStrItr) {
+ os << *tStrItr << "<br/>";
+ }
+ os << "\n";
+ }
+ }
+
+ void RenderMachineFunction::insertCSS(const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<style type=\"text/css\">\n"
+ << indent + s(2) << "body { font-color: black; }\n"
+ << indent + s(2) << "table.code td { font-family: monospace; "
+ "border-width: 0px; border-style: solid; "
+ "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+ << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+ << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+ << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+ << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+ << indent + s(2) << "table.code th { border-width: 0px; "
+ "border-style: solid; }\n"
+ << indent << "</style>\n";
+ }
+
+ void RenderMachineFunction::renderFunctionSummary(
+ const Spacer &indent, raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << indent << "<h1>Function: " << mf->getFunction()->getName()
+ << "</h1>\n"
+ << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+ }
+
+
+ void RenderMachineFunction::renderPressureTableLegend(
+ const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+ << indent << "<table class=\"code\">\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+ "<th>Appearance</th>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>No Pressure</td>"
+ "<td>No physical registers of this class requested.</td>"
+ "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>Low Pressure</td>"
+ "<td>Sufficient physical registers to meet demand.</td>"
+ "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>High Pressure</td>"
+ "<td>Potentially insufficient physical registers to meet demand.</td>"
+ "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent << "</table>\n";
+ }
+
+ template <typename CellType>
+ void RenderMachineFunction::renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const {
+
+ if (rleAccumulator.second == 0)
+ return;
+
+ typename std::map<CellType, std::string>::const_iterator ctsItr =
+ cellTypeStrs.find(rleAccumulator.first);
+
+ assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+ os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+ if (rleAccumulator.second > 1)
+ os << " colspan=" << rleAccumulator.second;
+ os << "></td>\n";
+ }
+
+
+ void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const {
+
+ std::map<LiveState, std::string> lsStrs;
+ lsStrs[Dead] = "l-n";
+ lsStrs[Defined] = "l-d";
+ lsStrs[Used] = "l-u";
+ lsStrs[AliveReg] = "l-r";
+ lsStrs[AliveStack] = "l-s";
+
+ std::map<PressureState, std::string> psStrs;
+ psStrs[Zero] = "p-z";
+ psStrs[Low] = "p-l";
+ psStrs[High] = "p-h";
+
+ // Open the table...
+
+ os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+ << indent + s(2) << "<tr>\n";
+
+ // Render the header row...
+
+ os << indent + s(4) << "<th>index</th>\n"
+ << indent + s(4) << "<th>instr</th>\n";
+
+ // Render class names if necessary...
+ if (!ro.regClasses().empty()) {
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, trc->getName());
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
+
+ // Render interval numbers if necessary...
+ if (!ro.intervals().empty()) {
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = *liItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, li->reg);
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ os << indent + s(2) << "</tr>\n";
+
+ // End header row, start with the data rows...
+
+ MachineInstr *mi = 0;
+
+ // Data rows:
+ for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+ i = i.getNextSlot()) {
+
+ // Render the slot column.
+ os << indent + s(2) << "<tr height=6ex>\n";
+
+ // Render the code column.
+ if (i.isLoad()) {
+ MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+ mi = sis->getInstructionFromIndex(i);
+
+ if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+ ro.renderEmptyIndexes()) {
+ os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
+ << indent + s(4) << "<td rowspan=4>\n";
+
+ if (i == sis->getMBBStartIdx(mbb)) {
+ os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
+ } else if (mi != 0) {
+ os << indent + s(6) << "&nbsp;&nbsp;";
+ renderMachineInstr(os, mi);
+ } else {
+ // Empty interval - leave blank.
+ }
+ os << indent + s(4) << "</td>\n";
+ } else {
+ i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ continue;
+ }
+ }
+
+ // Render the class columns.
+ if (!ro.regClasses().empty()) {
+ std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ PressureState newPressure = getPressureStateAt(trc, i);
+
+ if (newPressure == psRLEAccumulator.first) {
+ ++psRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ psRLEAccumulator.first = newPressure;
+ psRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<td width=2em></td>\n";
+
+ if (!ro.intervals().empty()) {
+ std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+ const LiveInterval *li = *liItr;
+ LiveState newLiveness = getLiveStateAt(li, i);
+
+ if (newLiveness == lsRLEAccumulator.first) {
+ ++lsRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ lsRLEAccumulator.first = newLiveness;
+ lsRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ }
+ os << indent + s(2) << "</tr>\n";
+ }
+
+ os << indent << "</table>\n";
+
+ if (!ro.regClasses().empty())
+ renderPressureTableLegend(indent, os);
+ }
+
+ void RenderMachineFunction::renderFunctionPage(
+ raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << "<html>\n"
+ << s(2) << "<head>\n"
+ << s(4) << "<title>" << fqn << "</title>\n";
+
+ insertCSS(s(4), os);
+
+ os << s(2) << "<head>\n"
+ << s(2) << "<body >\n";
+
+ renderFunctionSummary(s(4), os, renderContextStr);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ //renderLiveIntervalInfoTable(" ", os);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ renderCodeTablePlusPI(s(4), os);
+
+ os << s(2) << "</body>\n"
+ << "</html>\n";
+ }
+
+ void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ lis = &getAnalysis<LiveIntervals>();
+ sis = &getAnalysis<SlotIndexes>();
+
+ trei.setup(mf, mri, tri, lis);
+ ro.setup(mf, tri, lis, this);
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ return false;
+ }
+
+ void RenderMachineFunction::releaseMemory() {
+ trei.clear();
+ ro.clear();
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+ }
+
+ void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ const MachineInstr *mi = &*rItr;
+ if (mi->readsRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ }
+ if (mi->definesRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ }
+ }
+ }
+
+ void RenderMachineFunction::rememberSpills(
+ const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+ siEnd = spills.end();
+ siItr != siEnd; ++siItr) {
+ const LiveInterval *spill = *siItr;
+ spillIntervals[li].insert(spill);
+ spillFor[spill] = li;
+ }
+ }
+
+ bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+ SpillForMap::const_iterator sfItr = spillFor.find(li);
+ if (sfItr == spillFor.end())
+ return false;
+ return true;
+ }
+
+ void RenderMachineFunction::renderMachineFunction(
+ const char *renderContextStr,
+ const VirtRegMap *vrm,
+ const char *renderSuffix) {
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ this->vrm = vrm;
+ trei.reset();
+
+ std::string rpFileName(mf->getFunction()->getName().str() +
+ (renderSuffix ? renderSuffix : "") +
+ outputFileSuffix);
+
+ std::string errMsg;
+ raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+ renderFunctionPage(outFile, renderContextStr);
+
+ ro.resetRenderSpecificOptions();
+ }
+
+ std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+ return escapeChars(s.begin(), s.end());
+ }
+
+}
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
new file mode 100644
index 000000000000..8d56a8292ac5
--- /dev/null
+++ b/lib/CodeGen/RenderMachineFunction.h
@@ -0,0 +1,336 @@
+//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineRegisterInfo;
+ class RenderMachineFunction;
+ class TargetRegisterClass;
+ class TargetRegisterInfo;
+ class VirtRegMap;
+ class raw_ostream;
+
+ /// \brief Helper class to process rendering options. Tries to be as lazy as
+ /// possible.
+ class MFRenderingOptions {
+ public:
+
+ struct RegClassComp {
+ bool operator()(const TargetRegisterClass *trc1,
+ const TargetRegisterClass *trc2) const {
+ std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+ return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+ trc2Name.begin(), trc2Name.end());
+ }
+ };
+
+ typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+ struct IntervalComp {
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+ /// Initialise the rendering options.
+ void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+ LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+ /// Clear translations of options to the current function.
+ void clear();
+
+ /// Reset any options computed for this specific rendering.
+ void resetRenderSpecificOptions();
+
+ /// Should we render the current function.
+ bool shouldRenderCurrentMachineFunction() const;
+
+ /// Return the set of register classes to render pressure for.
+ const RegClassSet& regClasses() const;
+
+ /// Return the set of live intervals to render liveness for.
+ const IntervalSet& intervals() const;
+
+ /// Render indexes which are not associated with instructions / MBB starts.
+ bool renderEmptyIndexes() const;
+
+ /// Return whether or not to render using SVG for fancy vertical text.
+ bool fancyVerticals() const;
+
+ private:
+
+ static bool renderingOptionsProcessed;
+ static std::set<std::string> mfNamesToRender;
+ static bool renderAllMFs;
+
+ static std::set<std::string> classNamesToRender;
+ static bool renderAllClasses;
+
+
+ static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+ typedef enum { ExplicitOnly = 0,
+ AllPhys = 1,
+ VirtNoSpills = 2,
+ VirtSpills = 4,
+ AllVirt = 6,
+ All = 7 }
+ IntervalTypesToRender;
+ static unsigned intervalTypesToRender;
+
+ template <typename OutputItr>
+ static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+ static void processOptions();
+
+ static void processFuncNames();
+ static void processRegClassNames();
+ static void processIntervalNumbers();
+
+ static void processIntervalRange(const std::string &intervalRangeStr);
+
+ MachineFunction *mf;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ const RenderMachineFunction *rmf;
+
+ mutable bool regClassesTranslatedToCurrentFunction;
+ mutable RegClassSet regClassSet;
+
+ mutable bool intervalsTranslatedToCurrentFunction;
+ mutable IntervalSet intervalSet;
+
+ void translateRegClassNamesToCurrentFunction() const;
+
+ void translateIntervalNumbersToCurrentFunction() const;
+ };
+
+ /// \brief Provide extra information about the physical and virtual registers
+ /// in the function being compiled.
+ class TargetRegisterExtraInfo {
+ public:
+ TargetRegisterExtraInfo();
+
+ /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
+ /// sources of information.
+ void setup(MachineFunction *mf, MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri, LiveIntervals *lis);
+
+ /// \brief Recompute tables for changed function.
+ void reset();
+
+ /// \brief Free all tables in TargetRegisterExtraInfo.
+ void clear();
+
+ /// \brief Maximum number of registers from trc which alias reg.
+ unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
+
+ /// \brief Returns the number of allocable registers in trc.
+ unsigned getCapacity(const TargetRegisterClass *trc) const;
+
+ /// \brief Return the number of registers of class trc that may be
+ /// needed at slot i.
+ unsigned getPressureAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ /// \brief Return true if the number of registers of type trc that may be
+ /// needed at slot i is greater than the capacity of trc.
+ bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ private:
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
+ typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
+ VRWorstMap vrWorst;
+
+ typedef std::map<unsigned, WorstMapLine> PRWorstMap;
+ PRWorstMap prWorst;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
+ CapacityMap capacityMap;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
+ typedef std::map<SlotIndex, PressureMapLine> PressureMap;
+ PressureMap pressureMap;
+
+ bool mapsPopulated;
+
+ /// \brief Initialise the 'worst' table.
+ void initWorst();
+
+ /// \brief Initialise the 'capacity' table.
+ void initCapacity();
+
+ /// \brief Initialise/Reset the 'pressure' and live states tables.
+ void resetPressureAndLiveStates();
+ };
+
+ /// \brief Render MachineFunction objects and related information to a HTML
+ /// page.
+ class RenderMachineFunction : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ RenderMachineFunction() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+ void rememberUseDefs(const LiveInterval *li);
+
+ void rememberSpills(const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills);
+
+ bool isSpill(const LiveInterval *li) const;
+
+ /// \brief Render this machine function to HTML.
+ ///
+ /// @param renderContextStr This parameter will be included in the top of
+ /// the html file to explain where (in the
+ /// codegen pipeline) this function was rendered
+ /// from. Set it to something like
+ /// "Pre-register-allocation".
+ /// @param vrm If non-null the VRM will be queried to determine
+ /// whether a virtual register was allocated to a
+ /// physical register or spilled.
+ /// @param renderFilePrefix This string will be appended to the function
+ /// name (before the output file suffix) to enable
+ /// multiple renderings from the same function.
+ void renderMachineFunction(const char *renderContextStr,
+ const VirtRegMap *vrm = 0,
+ const char *renderSuffix = 0);
+
+ private:
+ class Spacer;
+ friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
+
+ std::string fqn;
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ SlotIndexes *sis;
+ const VirtRegMap *vrm;
+
+ TargetRegisterExtraInfo trei;
+ MFRenderingOptions ro;
+
+
+
+ // Utilities.
+ typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
+ LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
+
+ typedef enum { Zero, Low, High } PressureState;
+ PressureState getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+ SpillIntervals;
+ SpillIntervals spillIntervals;
+
+ typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+ SpillForMap spillFor;
+
+ typedef std::set<SlotIndex> SlotSet;
+ typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+ UseDefs useDefs;
+
+ // ---------- Rendering methods ----------
+
+ /// For inserting spaces when pretty printing.
+ class Spacer {
+ public:
+ explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
+ Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
+ void print(raw_ostream &os) const;
+ private:
+ unsigned ns;
+ };
+
+ Spacer s(unsigned ns) const;
+
+ template <typename Iterator>
+ std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
+
+ /// \brief Render a machine instruction.
+ void renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const;
+
+ /// \brief Render vertical text.
+ template <typename T>
+ void renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const;
+
+ /// \brief Insert CSS layout info.
+ void insertCSS(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a brief summary of the function (including rendering
+ /// context).
+ void renderFunctionSummary(const Spacer &indent,
+ raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ /// \brief Render a legend for the pressure table.
+ void renderPressureTableLegend(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a consecutive set of HTML cells of the same class using
+ /// the colspan attribute for run-length encoding.
+ template <typename CellType>
+ void renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const;
+
+ /// \brief Render code listing, potentially with register pressure
+ /// and live intervals shown alongside.
+ void renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render the HTML page representing the MachineFunction.
+ void renderFunctionPage(raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ std::string escapeChars(const std::string &s) const;
+ };
+}
+
+#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 09202f84cb29..ea93dd5c6663 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -32,7 +32,8 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
+ Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
MFI = mf.getFrameInfo();
DbgValueVec.clear();
}
@@ -159,8 +160,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
// Keep track of dangling debug references to registers.
- std::pair<MachineInstr*, unsigned>
- DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<std::pair<MachineInstr*, unsigned> >
+ DanglingDebugValue(TRI->getNumRegs(),
+ std::make_pair(static_cast<MachineInstr*>(0), 0));
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
@@ -172,7 +174,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValueVec.clear();
- std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue));
// Walk the list of instructions, from bottom moving up.
for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index d90659bb163e..c8f543f7146d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -106,8 +106,8 @@ namespace llvm {
/// are as we iterate upward through the instructions. This is allocated
/// here instead of inside BuildSchedGraph to avoid the need for it to be
/// initialized and destructed for each block.
- std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
- std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<std::vector<SUnit *> > Defs;
+ std::vector<std::vector<SUnit *> > Uses;
/// DbgValueVec - Remember DBG_VALUEs that refer to a particular
/// register.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e67175246457..c9c4d91e9736 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4489,6 +4489,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ DstEltVT, BV->getOperand(0)));
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4500,8 +4510,6 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- BV->getValueType(0).getVectorNumElements());
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
&Ops[0], Ops.size());
}
@@ -5790,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
SDValue N0 = Value.getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
if (LD->getBasePtr() != Ptr)
return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index decaa769e99f..a4eed71e65c0 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -94,7 +94,7 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!(I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::PtrToInt ||
I->getOpcode() == Instruction::IntToPtr) &&
- cast<Instruction>(I->use_begin())->getParent() == I->getParent();
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
}
unsigned FastISel::getRegForValue(const Value *V) {
@@ -146,7 +146,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-/// materializeRegForValue - Helper for getRegForVale. This function is
+/// materializeRegForValue - Helper for getRegForValue. This function is
/// called when the value isn't already available in a register and must
/// be materialized with new instructions.
unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
@@ -276,6 +276,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
void FastISel::recomputeInsertPt() {
if (getLastLocalValue()) {
FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
++FuncInfo.InsertPt;
} else
FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
@@ -472,17 +473,7 @@ bool FastISel::SelectCall(const User *I) {
return true;
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
// Don't handle byval struct arguments or VLAs, for example.
- // Note that if we have a byval struct argument, fast ISel is turned off;
- // those are handled in SelectionDAGBuilder.
- if (AI) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs.
- int FI = SI->second;
- if (!DI->getDebugLoc().isUnknown())
- FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(),
- FI, DI->getDebugLoc());
- } else
+ if (!AI)
// Building the map above is target independent. Generating DBG_VALUE
// inline is target dependent; do this now.
(void)TargetSelectInstruction(cast<Instruction>(I));
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 928e1ecd4cf4..5ef6404ee5d6 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -111,17 +112,56 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ // The object may need to be placed onto the stack near the stack
+ // protector if one exists. Determine here if this object is a suitable
+ // candidate. I.e., it would trigger the creation of a stack protector.
+ bool MayNeedSP =
+ (AI->isArrayAllocation() ||
+ (TySize > 8 && isa<ArrayType>(Ty) &&
+ cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
if (!isa<AllocaInst>(I) ||
!StaticAllocaMap.count(cast<AllocaInst>(I)))
InitializeRegForValue(I);
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+ }
+
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
@@ -181,6 +221,7 @@ void FunctionLoweringInfo::clear() {
#endif
LiveOutRegInfo.clear();
ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
RegFixups.clear();
}
@@ -214,6 +255,28 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
return FirstReg;
}
+/// setByValArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+ int FI) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ return 0;
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7a47da4ec52e..2981cd3f1cab 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -100,8 +100,7 @@ public:
/// it is already legal or we need to expand it into multiple registers of
/// smaller integer type, or we need to promote it to a larger type.
LegalizeAction getTypeAction(EVT VT) const {
- return
- (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
}
/// isTypeLegal - Return true if this type is legal on this target.
@@ -1314,21 +1313,30 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case TargetLowering::Expand:
- // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
- // f128 = EXTLOAD {f32,f64} too
- if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
- Node->getValueType(0) == MVT::f128)) ||
- (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Result = DAG.getNode(ISD::FP_EXTEND, dl,
- Node->getValueType(0), Load);
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
assert(ExtType != ISD::EXTLOAD &&
"EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b94ea9a3a9af..f8c589071921 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -234,8 +234,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
- TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
- JoinIntegers(N->getOperand(0), N->getOperand(1)));
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
@@ -245,7 +246,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
// Zero extend things like i1, sign extend everything else. It shouldn't
// matter in theory which one we pick, but this tends to give better code?
unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
SDValue(N, 0));
assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
return Result;
@@ -310,8 +312,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
- // and SINT conversions are Custom, there is no way to tell which is preferable.
- // We choose SINT because that's the right thing on PPC.)
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
if (N->getOpcode() == ISD::FP_TO_UINT &&
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
@@ -1030,7 +1032,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Hi = InL;
} else if (Amt == 1 &&
TLI.isOperationLegalOrCustom(ISD::ADDC,
- TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
SDValue LoOps[2] = { InL, InL };
@@ -1926,7 +1928,8 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
unsigned ExcessBits =
EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
- DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
}
}
@@ -2046,7 +2049,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
unsigned ExcessBits =
Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getZeroExtendInReg(Hi, dl,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bd86694446d6..d56029208e61 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -75,7 +75,7 @@ private:
/// getTypeAction - Return how we should legalize values of this type.
LegalizeAction getTypeAction(EVT VT) const {
- switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
+ switch (ValueTypeActions.getTypeAction(VT)) {
default:
assert(false && "Unknown legalize action!");
case TargetLowering::Legal:
@@ -86,8 +86,7 @@ private:
// 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
if (!VT.isVector())
return PromoteInteger;
- else
- return WidenVector;
+ return WidenVector;
case TargetLowering::Expand:
// Expand can mean
// 1) split scalar in half, 2) convert a float to an integer,
@@ -95,23 +94,21 @@ private:
if (!VT.isVector()) {
if (VT.isInteger())
return ExpandInteger;
- else if (VT.getSizeInBits() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
+ if (VT.getSizeInBits() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
return SoftenFloat;
- else
- return ExpandFloat;
- } else if (VT.getVectorNumElements() == 1) {
- return ScalarizeVector;
- } else {
- return SplitVector;
+ return ExpandFloat;
}
+
+ if (VT.getVectorNumElements() == 1)
+ return ScalarizeVector;
+ return SplitVector;
}
}
/// isTypeLegal - Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
- return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) ==
- TargetLowering::Legal);
+ return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
}
/// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -584,6 +581,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93aeff5c1e6c..93bc2d04928e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType())),
- 0);
+ Idx.getValueType())), 0);
}
// Store the vector to the stack.
@@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
@@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
}
@@ -1274,8 +1298,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
@@ -1283,124 +1307,123 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
- } else if (NumElts == 1) {
- // No legal vector version so unroll the vector operation and then widen.
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
- } else {
- // Since the operation can trap, apply operation on the original vector.
- EVT MaxVT = VT;
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
-
- SmallVector<SDValue, 16> ConcatOps(CurNumElts);
- unsigned ConcatEnd = 0; // Current ConcatOps index.
- int Idx = 0; // Current Idx into input vectors.
-
- // NumElts := greatest synthesizable vector size (at most WidenVT)
- // while (orig. vector has unhandled elements) {
- // take munches of size NumElts from the beginning and add to ConcatOps
- // NumElts := next smaller supported vector size or 1
- // }
- while (CurNumElts != 0) {
- while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
- Idx += NumElts;
- CurNumElts -= NumElts;
- }
- do {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
-
- if (NumElts == 1) {
- for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp2, DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
- }
- CurNumElts = 0;
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
}
+ CurNumElts = 0;
}
+ }
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
- // }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
-
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
- do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
-
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
- ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
- }
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- }
- else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, &SubConcatOps[0],
- OpsToConcat);
- ConcatEnd = SubConcatIdx + 1;
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeSynthesizable(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
}
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
}
+ }
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps =
- WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
}
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -1561,8 +1584,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
unsigned NewNumElts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
- NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
- WidenSize / InEltVT.getSizeInBits());
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
} else {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
@@ -1686,8 +1709,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SDValue RndOp = N->getOperand(3);
SDValue SatOp = N->getOperand(4);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
@@ -1720,9 +1742,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SmallVector<SDValue, 16> Ops(NumConcat);
Ops[0] = InOp;
SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i) {
+ for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- }
+
InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
@@ -2225,25 +2247,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Check if we can load the element with one instruction
if (LdWidth <= NewVTWidth) {
- if (NewVT.isVector()) {
- if (NewVT != WidenVT) {
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
- SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
- ConcatOps[0] = LdOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
- NumConcat);
- } else
- return LdOp;
- } else {
+ if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
}
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
}
// Load vector by using multiple loads from largest vector to scalar
@@ -2276,52 +2297,55 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Build the vector from the loads operations
unsigned End = LdOps.size();
- if (LdOps[0].getValueType().isVector()) {
- // If the load contains vectors, build the vector using concat vector.
- // All of the vectors used to loads are power of 2 and the scalars load
- // can be combined to make a power of 2 vector.
- SmallVector<SDValue, 16> ConcatOps(End);
- int i = End - 1;
- int Idx = End;
- EVT LdTy = LdOps[i].getValueType();
- // First combine the scalar loads to a vector
- if (!LdTy.isVector()) {
- for (--i; i >= 0; --i) {
- LdTy = LdOps[i].getValueType();
- if (LdTy.isVector())
- break;
- }
- ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
- }
- ConcatOps[--Idx] = LdOps[i];
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
- EVT NewLdTy = LdOps[i].getValueType();
- if (NewLdTy != LdTy) {
- // Create a larger vector
- ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- &ConcatOps[Idx], End - Idx);
- Idx = End - 1;
- LdTy = NewLdTy;
- }
- ConcatOps[--Idx] = LdOps[i];
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
}
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
- if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
- // We need to fill the rest with undefs to build the vector
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
- SmallVector<SDValue, 16> WidenOps(NumOps);
- SDValue UndefVal = DAG.getUNDEF(LdTy);
- unsigned i = 0;
- for (; i != End-Idx; ++i)
- WidenOps[i] = ConcatOps[Idx+i];
- for (; i != NumOps; ++i)
- WidenOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
- } else
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- &ConcatOps[Idx], End - Idx);
- } else // All the loads are scalar loads.
- return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
}
SDValue
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3b86c3286585..fae27294e364 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -432,6 +433,30 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
return N->getValueType(NumRes);
}
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -446,37 +471,44 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
- unsigned Reg = I->getReg();
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
- if (RegAdded.insert(Reg))
- LRegs.push_back(Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
}
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
if (!Node->isMachineOpcode())
continue;
const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
if (!TID.ImplicitDefs)
continue;
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
- if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
- if (RegAdded.insert(*Reg))
- LRegs.push_back(*Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(*Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
return !LRegs.empty();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3ef521c398e1..4c3e4e3b0768 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -54,10 +55,16 @@ static RegisterScheduler
static RegisterScheduler
hybridListDAGScheduler("list-hybrid",
- "Bottom-up rr list scheduling which avoid stalls for "
- "long latency instructions",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
createHybridListDAGScheduler);
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -181,7 +188,9 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
@@ -273,6 +282,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
+ AvailableQueue->ScheduledNode(SU);
+
ReleasePredecessors(SU, CurCycle);
// Release all the implicit physical register defs that are live.
@@ -291,7 +302,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
}
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -315,8 +325,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
DEBUG(SU->dump(this));
- AvailableQueue->UnscheduledNode(SU);
-
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
@@ -346,6 +354,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SU->isScheduled = false;
SU->isAvailable = true;
AvailableQueue->push(SU);
+ AvailableQueue->UnscheduledNode(SU);
}
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
@@ -956,7 +965,8 @@ namespace {
template<class SF>
class RegReductionPriorityQueue;
- /// Sorting functions for the Available queue.
+ /// bu_ls_rr_sort - Priority function for bottom up register pressure
+ // reduction scheduler.
struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
@@ -965,6 +975,8 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // td_ls_rr_sort - Priority function for top down register pressure reduction
+ // scheduler.
struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
@@ -973,6 +985,7 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // src_ls_rr_sort - Priority function for source order scheduler.
struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
@@ -983,13 +996,26 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
: SPQ(spq) {}
hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
: SPQ(RHS.SPQ) {}
-
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+ // scheduler.
+ struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
+ ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
bool operator()(const SUnit* left, const SUnit* right) const;
};
} // end anonymous namespace
@@ -1029,23 +1055,48 @@ namespace {
std::vector<SUnit*> Queue;
SF Picker;
unsigned CurQueueId;
+ bool TracksRegPressure;
protected:
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
-
+
+ MachineFunction &MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
ScheduleDAGRRList *scheduleDAG;
// SethiUllmanNumbers - The SethiUllman number for each node.
std::vector<unsigned> SethiUllmanNumbers;
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
public:
- RegReductionPriorityQueue(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : Picker(this), CurQueueId(0),
- TII(tii), TRI(tri), scheduleDAG(NULL) {}
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ }
+ }
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
@@ -1072,6 +1123,7 @@ namespace {
void releaseState() {
SUnits = 0;
SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
}
unsigned getNodePriority(const SUnit *SU) const {
@@ -1139,10 +1191,244 @@ namespace {
SU->NodeQueueId = 0;
}
+ bool HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true; // Reg pressure already high.
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ void ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+ }
+
+ void UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+ }
+
void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
scheduleDAG = scheduleDag;
}
+ void dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+ }
+
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
void AddPseudoTwoAddrDeps();
@@ -1161,6 +1447,9 @@ namespace {
typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
HybridBURRPriorityQueue;
+
+ typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ ILPBURRPriorityQueue;
}
/// closestSucc - Returns the scheduled cycle of the successor which is
@@ -1260,30 +1549,63 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
}
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort them
- // according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh)
+ return true;
+ else if (!LHigh && RHigh)
+ return false;
+ else if (!LHigh && !RHigh) {
+ // Low register pressure situation, schedule for latency if possible.
+ bool LStall = left->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < left->getHeight();
+ bool RStall = right->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < right->getHeight();
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ // If neither will cause a pipeline stall, try to reduce register pressure.
+ if (LStall) {
+ if (!RStall)
+ return true;
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ } else if (RStall)
return false;
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
+ // If either node is scheduling for latency, sort them by height and latency
+ // first.
+ if (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency) {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency;
+ }
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+bool ilp_ls_rr_sort::operator()(const SUnit *left,
+ const SUnit *right) const {
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh)
+ return true;
+ else if (!LHigh && RHigh)
+ return false;
+ else if (!LHigh && !RHigh) {
+ // Low register pressure situation, schedule to maximize instruction level
+ // parallelism.
+ if (left->NumPreds > right->NumPreds)
+ return false;
+ else if (left->NumPreds < right->NumPreds)
+ return false;
}
return BURRSort(left, right, SPQ);
@@ -1635,8 +1957,8 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
-
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1648,8 +1970,8 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
-
+ TDRegReductionPriorityQueue *PQ =
+ new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1661,8 +1983,8 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
-
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1673,9 +1995,24 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
- HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 06cf05308755..f1bf82ab145a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -59,8 +59,9 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- if (N->isMachineOpcode() &&
- N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
SU->SchedulingPref = Sched::None;
else
SU->SchedulingPref = TLI.getSchedulingPreference(N);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e83a0346b535..ad06ebda5b00 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2236,7 +2236,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (FiniteOnlyFPMath())
+ if (NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2281,35 +2281,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
-/// element of the result of the vector shuffle.
-SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
- unsigned i) {
- EVT VT = N->getValueType(0);
- if (N->getMaskElt(i) < 0)
- return getUNDEF(VT.getVectorElementType());
- unsigned Index = N->getMaskElt(i);
- unsigned NumElems = VT.getVectorNumElements();
- SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
- Index %= NumElems;
-
- if (V.getOpcode() == ISD::BIT_CONVERT) {
- V = V.getOperand(0);
- EVT VVT = V.getValueType();
- if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
- return SDValue();
- }
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Index == 0) ? V.getOperand(0)
- : getUNDEF(VT.getVectorElementType());
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Index);
- if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
- return getShuffleScalarElt(SVN, Index);
- return SDValue();
-}
-
-
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
@@ -2624,7 +2595,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
@@ -3021,7 +2993,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR &&
N3.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
@@ -5872,6 +5845,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() {
void SDNode::dump() const { dump(0); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
+ dbgs() << '\n';
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5895,7 +5869,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
OS << **i;
- if (next(i) != e)
+ if (llvm::next(i) != e)
OS << " ";
}
OS << ">";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 458e865a6b3c..e65744592c8b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -70,22 +70,29 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT);
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
// Assemble the value from multiple parts.
- if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ if (ValueVT.isInteger()) {
unsigned PartBits = PartVT.getSizeInBits();
unsigned ValueBits = ValueVT.getSizeInBits();
@@ -100,25 +107,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
- Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT);
- Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
if (RoundParts < NumParts) {
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, dl,
+ Hi = getCopyFromParts(DAG, DL,
Parts + RoundParts, OddParts, PartVT, OddVT);
// Combine the round and odd parts.
@@ -126,68 +133,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
if (TLI.isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
- Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
DAG.getConstant(Lo.getValueType().getSizeInBits(),
TLI.getPointerTy()));
- Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
- Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
- }
- } else if (ValueVT.isVector()) {
- // Handle a multi-element vector.
- EVT IntermediateVT, RegisterVT;
- unsigned NumIntermediates;
- unsigned NumRegs =
- TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
- assert(NumRegs == NumParts
- && "Part count doesn't match vector breakdown!");
- NumParts = NumRegs; // Silence a compiler warning.
- assert(RegisterVT == PartVT
- && "Part type doesn't match vector breakdown!");
- assert(RegisterVT == Parts[0].getValueType() &&
- "Part type doesn't match part!");
-
- // Assemble the parts into intermediate operands.
- SmallVector<SDValue, 8> Ops(NumIntermediates);
- if (NumIntermediates == NumParts) {
- // If the register was not expanded, truncate or copy the value,
- // as appropriate.
- for (unsigned i = 0; i != NumParts; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
- PartVT, IntermediateVT);
- } else if (NumParts > 0) {
- // If the intermediate type was expanded, build the intermediate
- // operands from the parts.
- assert(NumParts % NumIntermediates == 0 &&
- "Must expand into a divisible number of parts!");
- unsigned Factor = NumParts / NumIntermediates;
- for (unsigned i = 0; i != NumIntermediates; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
-
- // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
- // intermediate operands.
- Val = DAG.getNode(IntermediateVT.isVector() ?
- ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
- ValueVT, &Ops[0], NumIntermediates);
} else if (PartVT.isFloatingPoint()) {
// FP split into multiple FP parts (for ppcf128)
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
// FP split into integer parts (soft fp)
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
}
}
@@ -197,219 +165,315 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
if (PartVT == ValueVT)
return Val;
- if (PartVT.isVector()) {
- assert(ValueVT.isVector() && "Unknown vector conversion!");
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
- }
-
- if (ValueVT.isVector()) {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
- }
-
- if (PartVT.isInteger() &&
- ValueVT.isInteger()) {
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
if (AssertOp != ISD::DELETED_NODE)
- Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ Val = DAG.getNode(AssertOp, DL, PartVT, Val,
DAG.getValueType(ValueVT));
- return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- if (ValueVT.bitsLT(Val.getValueType())) {
- // FP_ROUND's are always exact here.
- return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getIntPtrConstant(1));
- }
- return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
}
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ }
+
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT);
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PtrVT = TLI.getPointerTy();
EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
- if (!NumParts)
+ if (NumParts == 0)
return;
- if (!ValueVT.isVector()) {
- if (PartVT == ValueVT) {
- assert(NumParts == 1 && "No-op copy with multiple parts!");
- Parts[0] = Val;
- return;
- }
-
- if (NumParts * PartBits > ValueVT.getSizeInBits()) {
- // If the parts cover more bits than the value has, promote the value.
- if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- assert(NumParts == 1 && "Do not know what to promote to!");
- Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
- } else if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- } else if (PartBits == ValueVT.getSizeInBits()) {
- // Different types of the same size.
- assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
- // If the parts cover less bits than value has, truncate the value.
- if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- }
-
- // The value may have changed - recompute ValueVT.
- ValueVT = Val.getValueType();
- assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
- "Failed to tile the value with PartVT!");
-
- if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
- Parts[0] = Val;
- return;
- }
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
- // Expand the value into multiple parts.
- if (NumParts & (NumParts - 1)) {
- // The number of parts is not a power of 2. Split off and copy the tail.
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Do not know what to expand to!");
- unsigned RoundParts = 1 << Log2_32(NumParts);
- unsigned RoundBits = RoundParts * PartBits;
- unsigned OddParts = NumParts - RoundParts;
- SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
- DAG.getConstant(RoundBits,
- TLI.getPointerTy()));
- getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
- OddParts, PartVT);
-
- if (TLI.isBigEndian())
- // The odd parts were reversed by getCopyToParts - unreverse them.
- std::reverse(Parts + RoundParts, Parts + NumParts);
-
- NumParts = RoundParts;
+ "Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
}
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
- // The number of parts is a power of 2. Repeatedly bisect the value using
- // EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
- EVT::getIntegerVT(*DAG.getContext(),
- ValueVT.getSizeInBits()),
- Val);
-
- for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
- for (unsigned i = 0; i < NumParts; i += StepSize) {
- unsigned ThisBits = StepSize * PartBits / 2;
- EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
- SDValue &Part0 = Parts[i];
- SDValue &Part1 = Parts[i+StepSize/2];
-
- Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(1, PtrVT));
- Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(0, PtrVT));
-
- if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part1);
- }
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
}
}
+ }
- if (TLI.isBigEndian())
- std::reverse(Parts, Parts + OrigNumParts);
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
- return;
- }
- // Vector ValueVT.
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
if (NumParts == 1) {
- if (PartVT != ValueVT) {
- if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- PartVT, Val,
- DAG.getConstant(0, PtrVT));
- }
- }
+ if (PartVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+ PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else {
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+ }
+
Parts[0] = Val;
return;
}
-
+
// Handle a multi-element vector.
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
- IntermediateVT, NumIntermediates, RegisterVT);
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
-
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector())
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates),
- PtrVT));
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
else
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- IntermediateVT, Val,
- DAG.getConstant(i, PtrVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
-
+
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -417,10 +481,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
}
}
+
+
+
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -460,11 +527,6 @@ namespace {
EVT regvt, EVT valuevt)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- RegsForValue(const SmallVector<unsigned, 4> &regs,
- const SmallVector<EVT, 4> &regvts,
- const SmallVector<EVT, 4> &valuevts)
- : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-
RegsForValue(LLVMContext &Context, const TargetLowering &tli,
unsigned Reg, const Type *Ty) {
ComputeValueVTs(tli, Ty, ValueVTs);
@@ -530,6 +592,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
SDValue &Chain, SDValue *Flag) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Assemble the legal parts into the final values.
@@ -623,8 +689,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
EVT RegisterVT = RegVTs[Value];
- getCopyToParts(DAG, dl,
- Val.getValue(Val.getResNo() + Value),
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT);
Part += NumParts;
}
@@ -701,6 +766,7 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
+ DanglingDebugInfoMap.clear();
CurDebugLoc = DebugLoc();
HasTailCall = false;
}
@@ -805,6 +871,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else {
+ SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
+ Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ }
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
@@ -826,6 +919,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
@@ -839,10 +933,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
-/// getValueImpl - Helper function for getValue and getMaterializedValue.
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
@@ -986,10 +1081,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
- EVT PtrVT = PtrValueVTs[0];
for (unsigned i = 0; i != NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
- DAG.getConstant(Offsets[i], PtrVT));
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -2709,11 +2804,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
Ty = StTy->getElementType(Field);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
-
- // Offset canonically 0 for unions, but type changes
- Ty = UnTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
@@ -2818,7 +2908,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Inform the Frame Information that we have just allocated a variable-sized
// object.
- FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3824,11 +3914,11 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
-SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
- const Value *V, MDNode *Variable,
- uint64_t Offset,
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
const SDValue &N) {
- if (!isa<Argument>(V))
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -3842,7 +3932,15 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
return false;
unsigned Reg = 0;
- if (N.getOpcode() == ISD::CopyFromReg) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ }
+
+ if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -3966,42 +4064,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
- if (!DIVariable(DI.getVariable()).Verify())
- return 0;
-
MDNode *Variable = DI.getVariable();
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
const Value *Address = DI.getAddress();
- if (!Address)
+ if (!Address || !DIVariable(DI.getVariable()).Verify())
return 0;
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
- Address = BCI->getOperand(0);
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
- if (AI) {
- // Don't handle byval arguments or VLAs, for example.
- // Non-byval arguments are handled here (they refer to the stack temporary
- // alloca at this point).
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI == FuncInfo.StaticAllocaMap.end())
- return 0; // VLAs.
- int FI = SI->second;
-
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
- MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
- }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
// absolute, but not relative, values are different depending on whether
// debug info exists.
++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ SDDbgValue*SDV =
+ DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+
SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
+ // Parameters are handled specially.
+ bool isParameter =
+ DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
if (isParameter && !AI) {
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (FINode)
@@ -4020,10 +4116,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
- // This isn't useful, but it shows what we're missing.
- SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, isParameter);
+ // If Address is an arugment then try to emits its dbg value using
+ // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
+ // to help track missing debug info.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ }
}
return 0;
}
@@ -4048,31 +4148,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, 0, false);
} else {
- bool createUndef = false;
- // FIXME : Why not use getValue() directly ?
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
SDValue N = NodeMap[V];
if (!N.getNode() && isa<Argument>(V))
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
SDV = DAG.getDbgValue(Variable, N.getNode(),
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else if (isa<PHINode>(V) && !V->use_empty()) {
- SDValue N = getValue(V);
- if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
- SDV = DAG.getDbgValue(Variable, N.getNode(),
- N.getResNo(), Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- }
- } else
- createUndef = true;
- } else
- createUndef = true;
- if (createUndef) {
+ } else if (isa<PHINode>(V) && !V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter; we need this fallback.
SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
@@ -4572,6 +4665,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
!isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
isTailCall = false;
+ // If there's a possibility that fast-isel has already selected some amount
+ // of the current basic block, don't emit a tail call.
+ if (isTailCall && EnableFastISel)
+ isTailCall = false;
+
std::pair<SDValue,SDValue> Result =
TLI.LowerCallTo(getRoot(), RetTy,
CS.paramHasAttr(0, Attribute::SExt),
@@ -6054,6 +6152,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
i += NumParts;
}
+ // Note down frame index for byval arguments.
+ if (I->hasByValAttr() && !ArgValues.empty())
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+
if (!I->use_empty()) {
SDValue Res;
if (!ArgValues.empty())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 46733d6db124..5f400e9c83ac 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,9 +18,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/CallSite.h"
@@ -64,6 +61,7 @@ class PHINode;
class PtrToIntInst;
class ReturnInst;
class SDISelAsmOperandInfo;
+class SDDbgValue;
class SExtInst;
class SelectInst;
class ShuffleVectorInst;
@@ -93,6 +91,24 @@ class SelectionDAGBuilder {
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
/// them up and then emit token factor nodes when possible. This allows us to
@@ -345,6 +361,9 @@ public:
void visit(unsigned Opcode, const User &I);
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
SDValue getValue(const Value *V);
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -506,13 +525,11 @@ private:
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
- /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a
- /// function argument, create the corresponding DBG_VALUE machine instruction
- /// for it now. At the end of instruction selection, they will be inserted to
- /// the entry BB.
- bool EmitFuncArgumentDbgValue(const DbgValueInst &DI,
- const Value *V, MDNode *Variable,
- uint64_t Offset, const SDValue &N);
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 08ba5482f7d2..66cb5ceb09e5 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -132,14 +132,16 @@ namespace llvm {
const TargetLowering &TLI = IS->getTargetLowering();
if (OptLevel == CodeGenOpt::None)
- return createFastDAGScheduler(IS, OptLevel);
+ return createSourceListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Latency)
return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
- assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
+ if (TLI.getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
- return createHybridListDAGScheduler(IS, OptLevel);
+ return createILPListDAGScheduler(IS, OptLevel);
}
}
@@ -169,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
//===----------------------------------------------------------------------===//
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
- MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+ MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
@@ -216,7 +218,7 @@ static bool FunctionCallsSetJmp(const Function *F) {
for (Value::const_use_iterator
I = Callee->use_begin(), E = Callee->use_end();
I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const CallInst *CI = dyn_cast<CallInst>(*I))
if (CI->getParent()->getParent() == F)
return true;
}
@@ -362,38 +364,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
CodeGenAndEmitDAG();
}
-namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
-/// nodes from the worklist.
-class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
- SmallVector<SDNode*, 128> &Worklist;
- SmallPtrSet<SDNode*, 128> &InWorklist;
-public:
- SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl,
- SmallPtrSet<SDNode*, 128> &inwl)
- : Worklist(wl), InWorklist(inwl) {}
-
- void RemoveFromWorklist(SDNode *N) {
- if (!InWorklist.erase(N)) return;
-
- SmallVector<SDNode*, 128>::iterator I =
- std::find(Worklist.begin(), Worklist.end(), N);
- assert(I != Worklist.end() && "Not in worklist");
-
- *I = Worklist.back();
- Worklist.pop_back();
- }
-
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
- RemoveFromWorklist(N);
- }
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
-};
-}
-
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6cae804422ce..8313de5e32bb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -199,7 +199,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
#else
errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
<< " on systems with Graphviz or gv!\n";
- return std::string("");
+ return std::string();
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4f3866956cac..b74f600cfa2d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -651,6 +651,53 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
return NumVectorRegs;
}
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// hasLegalSuperRegRegClasses - Return true if the specified register class
+/// has one or more super-reg register classes that are legal.
+bool
+TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
+ if (*RC->superregclasses_begin() == 0)
+ return false;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (isLegalRC(RRC))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLowering::findRepresentativeClass(EVT VT) const {
+ const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+ const TargetRegisterClass *BestRC = RC;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (RRC->isASubClass() || !isLegalRC(RRC))
+ continue;
+ if (!hasLegalSuperRegRegClasses(RRC))
+ return std::make_pair(RRC, 1);
+ BestRC = RRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -736,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1) {
+ bool IsLegalWiderType = false;
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeSynthesizable(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -744,32 +813,29 @@ void TargetLowering::computeRegisterProperties() {
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
- }
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
}
}
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -798,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
- // Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // we should widen to that legal vector type. This handles things like
+ // <2 x float> -> <4 x float>.
+ if (NumElts != 1 && getTypeAction(VT) == Promote) {
+ RegisterVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterVT)) {
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
@@ -828,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
- if (DestVT.bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
+ if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
- return 1;
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
@@ -1308,9 +1383,32 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ if ((APInt::getHighBitsSet(BitWidth,
+ BitWidth - InnerVT.getSizeInBits()) &
+ DemandedMask) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy();
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ }
+
KnownZero <<= SA->getZExtValue();
KnownOne <<= SA->getZExtValue();
// low bits known zero.
@@ -1415,11 +1513,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits()) &
- NewMask;
+ BitWidth - EVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (NewBits == 0)
+ if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
@@ -1886,12 +1983,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT ExtDstTy = N0.getValueType();
unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
- // If the extended part has any inconsistent bits, it cannot ever
- // compare equal. In other words, they have to be all ones or all
- // zeros.
- APInt ExtBits =
- APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
- if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
SDValue ZextOp;
@@ -2476,7 +2570,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
- C->getDebugLoc(),
+ C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index e69d3e4fa78a..b29ea19835bc 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -59,13 +59,16 @@ DisableCrossClassJoin("disable-cross-class-join",
cl::desc("Avoid coalescing cross register class copies"),
cl::init(false), cl::Hidden);
-static RegisterPass<SimpleRegisterCoalescing>
-X("simple-register-coalescing", "Simple Register Coalescing");
+static cl::opt<bool>
+DisablePhysicalJoin("disable-physical-join",
+ cl::desc("Avoid coalescing physical register copies"),
+ cl::init(false), cl::Hidden);
-// Declare that we implement the RegisterCoalescer interface
-static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
+INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+ "simple-register-coalescing", "Simple Register Coalescing",
+ false, false, true);
-const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
+char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -386,16 +389,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
return false;
- bool BHasSubRegs = false;
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
-
- // Abort if the subregisters of IntB.reg have values that are not simply the
+ // Abort if the aliases of IntB.reg have values that are not simply the
// clobbers from the superreg.
- if (BHasSubRegs)
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) &&
- HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+ if (li_->hasInterval(*AS) &&
+ HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
return false;
// If some of the uses of IntA.reg is already coalesced away, return false.
@@ -412,6 +411,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
}
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
@@ -470,16 +471,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (Extended)
UseMO.setIsKill(false);
}
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (UseMI->isCopy()) {
- if (UseMI->getOperand(0).getReg() != IntB.reg ||
- UseMI->getOperand(0).getSubReg())
- continue;
- } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
- if (DstReg != IntB.reg || DstSubIdx)
- continue;
- } else
+ if (!UseMI->isCopy())
continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
// This copy will become a noop. If it's defining a new val#,
// remove that val# as well. However this live range is being
// extended to the end of the existing live range defined by the copy.
@@ -504,13 +501,13 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Remove val#'s defined by copies that will be coalesced away.
for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
VNInfo *DeadVNI = BDeadValNos[i];
- if (BHasSubRegs) {
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
+ if (!li_->hasInterval(*AS))
continue;
- LiveInterval &SRLI = li_->getInterval(*SR);
- if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def))
- SRLI.removeValNo(SRLR->valno);
+ LiveInterval &ASLI = li_->getInterval(*AS);
+ if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
+ ASLI.removeValNo(ASLR->valno);
}
}
IntB.removeValNo(BDeadValNos[i]);
@@ -628,14 +625,6 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
DefMO.setIsDead();
}
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- DstReg == li.reg && DstSubIdx == 0) {
- // Last use is itself an identity code.
- int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg,
- false, false, tri_);
- LastUseMI->getOperand(DeadIdx).setIsDead();
- }
return true;
}
@@ -772,16 +761,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
// A PhysReg copy that won't be coalesced can perhaps be rematerialized
// instead.
if (DstIsPhys) {
- unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
- CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcSubIdx == 0 && CopyDstSubIdx == 0 &&
- CopySrcReg != CopyDstReg && CopySrcReg == SrcReg &&
- CopyDstReg != DstReg && !JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0,
- UseMI))
- continue;
-
if (UseMI->isCopy() &&
!UseMI->getOperand(1).getSubReg() &&
!UseMI->getOperand(0).getSubReg() &&
@@ -834,28 +813,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
dbgs() << li_->getInstructionIndex(UseMI) << "\t";
dbgs() << *UseMI;
});
-
-
- // After updating the operand, check if the machine instruction has
- // become a copy. If so, update its val# information.
- const TargetInstrDesc &TID = UseMI->getDesc();
- if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2)
- continue;
-
- unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
- CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcReg != CopyDstReg &&
- (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
- allocatableRegs_[CopyDstReg])) {
- LiveInterval &LI = li_->getInterval(CopyDstReg);
- SlotIndex DefIdx =
- li_->getInstructionIndex(UseMI).getDefIndex();
- if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
- if (DLR->valno->def == DefIdx)
- DLR->valno->setCopy(UseMI);
- }
- }
}
}
@@ -1082,13 +1039,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false; // Not coalescable.
}
+ if (DisablePhysicalJoin && CP.isPhys()) {
+ DEBUG(dbgs() << "\tPhysical joins disabled.\n");
+ return false;
+ }
+
DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
// Enforce policies.
if (CP.isPhys()) {
DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
// Only coalesce to allocatable physreg.
- if (!allocatableRegs_[CP.getDstReg()]) {
+ if (!li_->isAllocatable(CP.getDstReg())) {
DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
return false; // Not coalescable.
}
@@ -1137,7 +1099,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// happens.
if (li_->hasInterval(CP.getDstReg()) &&
li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
- mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
++numAborts;
DEBUG(dbgs()
<< "\tPhysical register live interval too complicated, abort!\n");
@@ -1156,7 +1117,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
return true;
- mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
++numAborts;
DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
Again = true; // May be possible to coalesce later.
@@ -1543,21 +1503,19 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
MachineInstr *Inst = MII++;
// If this isn't a copy nor a extract_subreg, we can't join intervals.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- bool isInsUndef = false;
+ unsigned SrcReg, DstReg;
if (Inst->isCopy()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(1).getReg();
} else if (Inst->isSubregToReg()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(2).getReg();
- } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ } else
continue;
bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (isInsUndef ||
- (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()))
+ if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
ImpDefCopies.push_back(CopyRec(Inst, 0));
else if (SrcIsPhys || DstIsPhys)
PhysCopies.push_back(CopyRec(Inst, 0));
@@ -1679,11 +1637,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
MachineInstr *UseMI = Use.getParent();
if (UseMI->isIdentityCopy())
continue;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg && SrcSubIdx == DstSubIdx)
- // Ignore identity copies.
- continue;
SlotIndex Idx = li_->getInstructionIndex(UseMI);
// FIXME: Should this be Idx != UseIdx? SlotIndex() will return something
// that compares higher than any other interval.
@@ -1708,10 +1661,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
return NULL;
// Ignore identity copies.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!MI->isIdentityCopy() &&
- !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg && SrcSubIdx == DstSubIdx))
+ if (!MI->isIdentityCopy())
for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
MachineOperand &Use = MI->getOperand(i);
if (Use.isReg() && Use.isUse() && Use.getReg() &&
@@ -1747,7 +1697,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
- allocatableRegs_ = tri_->getAllocatableSet(fn);
for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
E = tri_->regclass_end(); I != E; ++I)
allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1775,30 +1724,35 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
mii != mie; ) {
MachineInstr *MI = mii;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (JoinedCopies.count(MI)) {
// Delete all coalesced copies.
bool DoDelete = true;
- if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(MI->isCopyLike() && "Unrecognized copy instruction");
- SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- // Do not delete extract_subreg, insert_subreg of physical
- // registers unless the definition is dead. e.g.
- // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
- // or else the scavenger may complain. LowerSubregs will
- // delete them later.
- DoDelete = false;
- }
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ MI->getNumOperands() > 2)
+ // Do not delete extract_subreg, insert_subreg of physical
+ // registers unless the definition is dead. e.g.
+ // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+ // or else the scavenger may complain. LowerSubregs will
+ // delete them later.
+ DoDelete = false;
+
if (MI->allDefsAreDead()) {
LiveInterval &li = li_->getInterval(SrcReg);
if (!ShortenDeadCopySrcLiveRange(li, MI))
ShortenDeadCopyLiveRange(li, MI);
DoDelete = true;
}
- if (!DoDelete)
+ if (!DoDelete) {
+ // We need the instruction to adjust liveness, so make it a KILL.
+ if (MI->isSubregToReg()) {
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(1);
+ }
+ MI->setDesc(tii_->get(TargetOpcode::KILL));
mii = llvm::next(mii);
- else {
+ } else {
li_->RemoveMachineInstrFromMaps(MI);
mii = mbbi->erase(mii);
++numPeep;
@@ -1840,9 +1794,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
}
// If the move will be an identity move delete it
- bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- if (MI->isIdentityCopy() ||
- (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) {
+ if (MI->isIdentityCopy()) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
if (li_->hasInterval(SrcReg)) {
LiveInterval &RegInt = li_->getInterval(SrcReg);
// If def of this move instruction is dead, remove its live range
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index e154da60affa..855bdb98b36c 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -47,7 +47,6 @@ namespace llvm {
const MachineLoopInfo* loopInfo;
AliasAnalysis *AA;
- BitVector allocatableRegs_;
DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
/// JoinedCopies - Keep track of copies eliminated due to coalescing.
@@ -64,7 +63,7 @@ namespace llvm {
public:
static char ID; // Pass identifcation, replacement for typeid
- SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {}
+ SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
struct InstrSlots {
enum {
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index e90869d600dd..b637980f885c 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -58,7 +58,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit SjLjEHPass(const TargetLowering *tli = NULL)
- : FunctionPass(&ID), TLI(tli) { }
+ : FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 7a227cf02d57..1bc148f160bc 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -40,7 +40,8 @@ namespace {
}
char SlotIndexes::ID = 0;
-static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false);
IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
return &*IndexListEntryEmptyKey;
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 56bcb2824ae8..59d5ab33c994 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -49,29 +50,31 @@ namespace {
/// Utility class for spillers.
class SpillerBase : public Spiller {
protected:
+ MachineFunctionPass *pass;
MachineFunction *mf;
+ VirtRegMap *vrm;
LiveIntervals *lis;
MachineFrameInfo *mfi;
MachineRegisterInfo *mri;
const TargetInstrInfo *tii;
const TargetRegisterInfo *tri;
- VirtRegMap *vrm;
/// Construct a spiller base.
- SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : mf(mf), lis(lis), vrm(vrm)
+ SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : pass(&pass), mf(&mf), vrm(&vrm)
{
- mfi = mf->getFrameInfo();
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
+ lis = &pass.getAnalysis<LiveIntervals>();
+ mfi = mf.getFrameInfo();
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
}
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
void trivialSpillEverywhere(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals) {
+ SmallVectorImpl<LiveInterval*> &newIntervals) {
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -173,13 +176,13 @@ namespace {
class TrivialSpiller : public SpillerBase {
public:
- TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : SpillerBase(mf, lis, vrm) {}
+ TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : SpillerBase(pass, mf, vrm) {}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &,
- SlotIndex*) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &) {
// Ignore spillIs - we don't use it.
trivialSpillEverywhere(li, newIntervals);
}
@@ -193,18 +196,19 @@ namespace {
class StandardSpiller : public Spiller {
protected:
LiveIntervals *lis;
- const MachineLoopInfo *loopInfo;
+ MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
- StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo,
- VirtRegMap *vrm)
- : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
+ StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : lis(&pass.getAnalysis<LiveIntervals>()),
+ loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
+ vrm(&vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
std::vector<LiveInterval*> added =
lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
newIntervals.insert(newIntervals.end(), added.begin(), added.end());
@@ -221,23 +225,21 @@ namespace {
/// then the spiller falls back on the standard spilling mechanism.
class SplittingSpiller : public StandardSpiller {
public:
- SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
- const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
- : StandardSpiller(lis, loopInfo, vrm) {
-
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
+ SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : StandardSpiller(pass, mf, vrm) {
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestStart) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
if (worthTryingToSplit(li))
- tryVNISplit(li, earliestStart);
+ tryVNISplit(li);
else
- StandardSpiller::spill(li, newIntervals, spillIs, earliestStart);
+ StandardSpiller::spill(li, newIntervals, spillIs);
}
private:
@@ -252,8 +254,7 @@ private:
}
/// Try to break a LiveInterval into its component values.
- std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
- SlotIndex *earliestStart) {
+ std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
@@ -277,10 +278,6 @@ private:
DEBUG(dbgs() << *splitInterval << "\n");
added.push_back(splitInterval);
alreadySplit.insert(splitInterval);
- if (earliestStart != 0) {
- if (splitInterval->beginIndex() < *earliestStart)
- *earliestStart = splitInterval->beginIndex();
- }
} else {
DEBUG(dbgs() << "0\n");
}
@@ -293,10 +290,6 @@ private:
if (!li->empty()) {
added.push_back(li);
alreadySplit.insert(li);
- if (earliestStart != 0) {
- if (li->beginIndex() < *earliestStart)
- *earliestStart = li->beginIndex();
- }
}
return added;
@@ -506,20 +499,19 @@ private:
namespace llvm {
-Spiller *createInlineSpiller(MachineFunction*,
- LiveIntervals*,
- const MachineLoopInfo*,
- VirtRegMap*);
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
}
-llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
- const MachineLoopInfo *loopInfo,
- VirtRegMap *vrm) {
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
switch (spillerOpt) {
default: assert(0 && "unknown spiller");
- case trivial: return new TrivialSpiller(mf, lis, vrm);
- case standard: return new StandardSpiller(lis, loopInfo, vrm);
- case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
- case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm);
+ case trivial: return new TrivialSpiller(pass, mf, vrm);
+ case standard: return new StandardSpiller(pass, mf, vrm);
+ case splitting: return new SplittingSpiller(pass, mf, vrm);
+ case inline_: return createInlineSpiller(pass, mf, vrm);
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 450447b3933a..59bc0ec6ae70 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -11,19 +11,14 @@
#define LLVM_CODEGEN_SPILLER_H
#include "llvm/ADT/SmallVector.h"
-#include <vector>
namespace llvm {
class LiveInterval;
- class LiveIntervals;
- class LiveStacks;
class MachineFunction;
- class MachineInstr;
- class MachineLoopInfo;
+ class MachineFunctionPass;
class SlotIndex;
class VirtRegMap;
- class VNInfo;
/// Spiller interface.
///
@@ -40,18 +35,16 @@ namespace llvm {
/// @param spillIs A list of intervals that are about to be spilled,
/// and so cannot be used for remat etc.
/// @param newIntervals The newly created intervals will be appended here.
- /// @param earliestIndex The earliest point for splitting. (OK, it's another
- /// pointer to the allocator guts).
virtual void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex = 0) = 0;
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) = 0;
};
/// Create and return a spiller object, as specified on the command line.
- Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
- const MachineLoopInfo *loopInfo, VirtRegMap *vrm);
+ Spiller* createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
}
#endif
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 000000000000..29474f0d5512
--- /dev/null
+++ b/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1097 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "splitter"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+AllowSplit("spiller-splits-edges",
+ cl::desc("Allow critical edge splitting during spilling"));
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+ const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : mf_(mf),
+ lis_(lis),
+ loops_(mli),
+ tii_(*mf.getTarget().getInstrInfo()),
+ curli_(0) {}
+
+void SplitAnalysis::clear() {
+ usingInstrs_.clear();
+ usingBlocks_.clear();
+ usingLoops_.clear();
+ curli_ = 0;
+}
+
+bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
+ MachineBasicBlock *T, *F;
+ SmallVector<MachineOperand, 4> Cond;
+ return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+void SplitAnalysis::analyzeUses() {
+ const MachineRegisterInfo &MRI = mf_.getRegInfo();
+ for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
+ MachineInstr *MI = I.skipInstruction();) {
+ if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+ continue;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (usingBlocks_[MBB]++)
+ continue;
+ if (MachineLoop *Loop = loops_.getLoopFor(MBB))
+ usingLoops_[Loop]++;
+ }
+ DEBUG(dbgs() << " counted "
+ << usingInstrs_.size() << " instrs, "
+ << usingBlocks_.size() << " blocks, "
+ << usingLoops_.size() << " loops.\n");
+}
+
+/// removeUse - Update statistics by noting that MI no longer uses curli.
+void SplitAnalysis::removeUse(const MachineInstr *MI) {
+ if (!usingInstrs_.erase(MI))
+ return;
+
+ // Decrement MBB count.
+ const MachineBasicBlock *MBB = MI->getParent();
+ BlockCountMap::iterator bi = usingBlocks_.find(MBB);
+ assert(bi != usingBlocks_.end() && "MBB missing");
+ assert(bi->second && "0 count in map");
+ if (--bi->second)
+ return;
+ // No more uses in MBB.
+ usingBlocks_.erase(bi);
+
+ // Decrement loop count.
+ MachineLoop *Loop = loops_.getLoopFor(MBB);
+ if (!Loop)
+ return;
+ LoopCountMap::iterator li = usingLoops_.find(Loop);
+ assert(li != usingLoops_.end() && "Loop missing");
+ assert(li->second && "0 count in map");
+ if (--li->second)
+ return;
+ // No more blocks in Loop.
+ usingLoops_.erase(li);
+}
+
+// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
+// predecessor blocks, and exit blocks.
+void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
+ Blocks.clear();
+
+ // Blocks in the loop.
+ Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
+
+ // Predecessor blocks.
+ const MachineBasicBlock *Header = Loop->getHeader();
+ for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
+ E = Header->pred_end(); I != E; ++I)
+ if (!Blocks.Loop.count(*I))
+ Blocks.Preds.insert(*I);
+
+ // Exit blocks.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if (!Blocks.Loop.count(*SI))
+ Blocks.Exits.insert(*SI);
+ }
+}
+
+/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
+/// and around the Loop.
+SplitAnalysis::LoopPeripheralUse SplitAnalysis::
+analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
+ LoopPeripheralUse use = ContainedInLoop;
+ for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
+ I != E; ++I) {
+ const MachineBasicBlock *MBB = I->first;
+ // Is this a peripheral block?
+ if (use < MultiPeripheral &&
+ (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
+ if (I->second > 1) use = MultiPeripheral;
+ else use = SinglePeripheral;
+ continue;
+ }
+ // Is it a loop block?
+ if (Blocks.Loop.count(MBB))
+ continue;
+ // It must be an unrelated block.
+ return OutsideLoop;
+ }
+ return use;
+}
+
+/// getCriticalExits - It may be necessary to partially break critical edges
+/// leaving the loop if an exit block has phi uses of curli. Collect the exit
+/// blocks that need special treatment into CriticalExits.
+void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits) {
+ CriticalExits.clear();
+
+ // A critical exit block contains a phi def of curli, and has a predecessor
+ // that is not in the loop nor a loop predecessor.
+ // For such an exit block, the edges carrying the new variable must be moved
+ // to a new pre-exit block.
+ for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
+ I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
+ VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
+ // This exit may not have curli live in at all. No need to split.
+ if (!SuccVNI)
+ continue;
+ // If this is not a PHI def, it is either using a value from before the
+ // loop, or a value defined inside the loop. Both are safe.
+ if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
+ continue;
+ // This exit block does have a PHI. Does it also have a predecessor that is
+ // not a loop block or loop predecessor?
+ for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
+ PE = Succ->pred_end(); PI != PE; ++PI) {
+ const MachineBasicBlock *Pred = *PI;
+ if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
+ continue;
+ // This is a critical exit block, and we need to split the exit edge.
+ CriticalExits.insert(Succ);
+ break;
+ }
+ }
+}
+
+/// canSplitCriticalExits - Return true if it is possible to insert new exit
+/// blocks before the blocks in CriticalExits.
+bool
+SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits) {
+ // If we don't allow critical edge splitting, require no critical exits.
+ if (!AllowSplit)
+ return CriticalExits.empty();
+
+ for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
+ I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // We want to insert a new pre-exit MBB before Succ, and change all the
+ // in-loop blocks to branch to the pre-exit instead of Succ.
+ // Check that all the in-loop predecessors can be changed.
+ for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
+ PE = Succ->pred_end(); PI != PE; ++PI) {
+ const MachineBasicBlock *Pred = *PI;
+ // The external predecessors won't be altered.
+ if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
+ continue;
+ if (!canAnalyzeBranch(Pred))
+ return false;
+ }
+
+ // If Succ's layout predecessor falls through, that too must be analyzable.
+ // We need to insert the pre-exit block in the gap.
+ MachineFunction::const_iterator MFI = Succ;
+ if (MFI == mf_.begin())
+ continue;
+ if (!canAnalyzeBranch(--MFI))
+ return false;
+ }
+ // No problems found.
+ return true;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ curli_ = li;
+ analyzeUses();
+}
+
+const MachineLoop *SplitAnalysis::getBestSplitLoop() {
+ assert(curli_ && "Call analyze() before getBestSplitLoop");
+ if (usingLoops_.empty())
+ return 0;
+
+ LoopPtrSet Loops, SecondLoops;
+ LoopBlocks Blocks;
+ BlockPtrSet CriticalExits;
+
+ // Find first-class and second class candidate loops.
+ // We prefer to split around loops where curli is used outside the periphery.
+ for (LoopCountMap::const_iterator I = usingLoops_.begin(),
+ E = usingLoops_.end(); I != E; ++I) {
+ const MachineLoop *Loop = I->first;
+ getLoopBlocks(Loop, Blocks);
+
+ // FIXME: We need an SSA updater to properly handle multiple exit blocks.
+ if (Blocks.Exits.size() > 1) {
+ DEBUG(dbgs() << " multiple exits from " << *Loop);
+ continue;
+ }
+
+ LoopPtrSet *LPS = 0;
+ switch(analyzeLoopPeripheralUse(Blocks)) {
+ case OutsideLoop:
+ LPS = &Loops;
+ break;
+ case MultiPeripheral:
+ LPS = &SecondLoops;
+ break;
+ case ContainedInLoop:
+ DEBUG(dbgs() << " contained in " << *Loop);
+ continue;
+ case SinglePeripheral:
+ DEBUG(dbgs() << " single peripheral use in " << *Loop);
+ continue;
+ }
+ // Will it be possible to split around this loop?
+ getCriticalExits(Blocks, CriticalExits);
+ DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from "
+ << *Loop);
+ if (!canSplitCriticalExits(Blocks, CriticalExits))
+ continue;
+ // This is a possible split.
+ assert(LPS);
+ LPS->insert(Loop);
+ }
+
+ DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + "
+ << SecondLoops.size() << " candidate loops.\n");
+
+ // If there are no first class loops available, look at second class loops.
+ if (Loops.empty())
+ Loops = SecondLoops;
+
+ if (Loops.empty())
+ return 0;
+
+ // Pick the earliest loop.
+ // FIXME: Are there other heuristics to consider?
+ const MachineLoop *Best = 0;
+ SlotIndex BestIdx;
+ for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
+ ++I) {
+ SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
+ if (!Best || Idx < BestIdx)
+ Best = *I, BestIdx = Idx;
+ }
+ DEBUG(dbgs() << " getBestSplitLoop found " << *Best);
+ return Best;
+}
+
+/// getMultiUseBlocks - if curli has more than one use in a basic block, it
+/// may be an advantage to split curli for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+ // If curli is local to one block, there is no point to splitting it.
+ if (usingBlocks_.size() <= 1)
+ return false;
+ // Add blocks with multiple uses.
+ for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
+ I != E; ++I)
+ switch (I->second) {
+ case 0:
+ case 1:
+ continue;
+ case 2: {
+ // It doesn't pay to split a 2-instr block if it redefines curli.
+ VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
+ VNInfo *VN2 =
+ curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
+ // live-in and live-out with a different value.
+ if (VN1 && VN2 && VN1 != VN2)
+ continue;
+ } // Fall through.
+ default:
+ Blocks.insert(I->first);
+ }
+ return !Blocks.empty();
+}
+
+//===----------------------------------------------------------------------===//
+// LiveIntervalMap
+//===----------------------------------------------------------------------===//
+
+// defValue - Introduce a li_ def for ParentVNI that could be later than
+// ParentVNI->def.
+VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Is this a simple 1-1 mapping? Not likely.
+ if (Idx == ParentVNI->def)
+ return mapValue(ParentVNI, Idx);
+
+ // This is a complex def. Mark with a NULL in valueMap.
+ VNInfo *OldVNI =
+ valueMap_.insert(
+ ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
+ // The static_cast<VNInfo *> is only needed to work around a bug in an
+ // old version of the C++0x standard which the following compilers
+ // implemented and have yet to fix:
+ //
+ // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
+ //
+ // If/When we move to C++0x, this can be replaced by nullptr.
+ (void)OldVNI;
+ assert(OldVNI == 0 && "Simple/Complex values mixed");
+
+ // Should we insert a minimal snippet of VNI LiveRange, or can we count on
+ // callers to do that? We need it for lookups of complex values.
+ VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+ return VNI;
+}
+
+// mapValue - Find the mapped value for ParentVNI at Idx.
+// Potentially create phi-def values.
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator,bool> InsP =
+ valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
+ // The static_cast<VNInfo *> is only needed to work around a bug in an
+ // old version of the C++0x standard which the following compilers
+ // implemented and have yet to fix:
+ //
+ // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
+ //
+ // If/When we move to C++0x, this can be replaced by nullptr.
+
+ // This was an unknown value. Create a simple mapping.
+ if (InsP.second)
+ return InsP.first->second = li_.createValueCopy(ParentVNI,
+ lis_.getVNInfoAllocator());
+ // This was a simple mapped value.
+ if (InsP.first->second)
+ return InsP.first->second;
+
+ // This is a complex mapped value. There may be multiple defs, and we may need
+ // to create phi-defs.
+ MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+ assert(IdxMBB && "No MBB at Idx");
+
+ // Is there a def in the same MBB we can extend?
+ if (VNInfo *VNI = extendTo(IdxMBB, Idx))
+ return VNI;
+
+ // Now for the fun part. We know that ParentVNI potentially has multiple defs,
+ // and we may need to create even more phi-defs to preserve VNInfo SSA form.
+ // Perform a depth-first search for predecessor blocks where we know the
+ // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+
+ // Track MBBs where we have created or learned the dominating value.
+ // This may change during the DFS as we create new phi-defs.
+ typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
+ MBBValueMap DomValue;
+
+ for (idf_iterator<MachineBasicBlock*>
+ IDFI = idf_begin(IdxMBB),
+ IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
+ MachineBasicBlock *MBB = *IDFI;
+ SlotIndex End = lis_.getMBBEndIdx(MBB);
+
+ // We are operating on the restricted CFG where ParentVNI is live.
+ if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
+ IDFI.skipChildren();
+ continue;
+ }
+
+ // Do we have a dominating value in this block?
+ VNInfo *VNI = extendTo(MBB, End);
+ if (!VNI) {
+ ++IDFI;
+ continue;
+ }
+
+ // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
+ // as needed along the way.
+ for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
+ // Start from MBB's immediate successor. End at IdxMBB.
+ MachineBasicBlock *Succ = IDFI.getPath(PI-1);
+ std::pair<MBBValueMap::iterator, bool> InsP =
+ DomValue.insert(MBBValueMap::value_type(Succ, VNI));
+
+ // This is the first time we backtrack to Succ.
+ if (InsP.second)
+ continue;
+
+ // We reached Succ again with the same VNI. Nothing is going to change.
+ VNInfo *OVNI = InsP.first->second;
+ if (OVNI == VNI)
+ break;
+
+ // Succ already has a phi-def. No need to continue.
+ SlotIndex Start = lis_.getMBBStartIdx(Succ);
+ if (OVNI->def == Start)
+ break;
+
+ // We have a collision between the old and new VNI at Succ. That means
+ // neither dominates and we need a new phi-def.
+ VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ InsP.first->second = VNI;
+
+ // Replace OVNI with VNI in the remaining path.
+ for (; PI > 1 ; --PI) {
+ MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
+ if (I == DomValue.end() || I->second != OVNI)
+ break;
+ I->second = VNI;
+ }
+ }
+
+ // No need to search the children, we found a dominating value.
+ IDFI.skipChildren();
+ }
+
+ // The search should at least find a dominating value for IdxMBB.
+ assert(!DomValue.empty() && "Couldn't find a reaching definition");
+
+ // Since we went through the trouble of a full DFS visiting all reaching defs,
+ // the values in DomValue are now accurate. No more phi-defs are needed for
+ // these blocks, so we can color the live ranges.
+ // This makes the next mapValue call much faster.
+ VNInfo *IdxVNI = 0;
+ for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
+ ++I) {
+ MachineBasicBlock *MBB = I->first;
+ VNInfo *VNI = I->second;
+ SlotIndex Start = lis_.getMBBStartIdx(MBB);
+ if (MBB == IdxMBB) {
+ // Don't add full liveness to IdxMBB, stop at Idx.
+ if (Start != Idx)
+ li_.addRange(LiveRange(Start, Idx, VNI));
+ // The caller had better add some liveness to IdxVNI, or it leaks.
+ IdxVNI = VNI;
+ } else
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ }
+
+ assert(IdxVNI && "Didn't find value for Idx");
+ return IdxVNI;
+}
+
+// extendTo - Find the last li_ value defined in MBB at or before Idx. The
+// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+// Return the found VNInfo, or NULL.
+VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
+ LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
+ if (I == li_.begin())
+ return 0;
+ --I;
+ if (I->start < lis_.getMBBStartIdx(MBB))
+ return 0;
+ if (I->end < Idx)
+ I->end = Idx;
+ return I->valno;
+}
+
+// addSimpleRange - Add a simple range from parentli_ to li_.
+// ParentVNI must be live in the [Start;End) interval.
+void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
+ const VNInfo *ParentVNI) {
+ VNInfo *VNI = mapValue(ParentVNI, Start);
+ // A simple mappoing is easy.
+ if (VNI->def == ParentVNI->def) {
+ li_.addRange(LiveRange(Start, End, VNI));
+ return;
+ }
+
+ // ParentVNI is a complex value. We must map per MBB.
+ MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
+ MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+
+ if (MBB == MBBE) {
+ li_.addRange(LiveRange(Start, End, VNI));
+ return;
+ }
+
+ // First block.
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+
+ // Run sequence of full blocks.
+ for (++MBB; MBB != MBBE; ++MBB) {
+ Start = lis_.getMBBStartIdx(MBB);
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
+ mapValue(ParentVNI, Start)));
+ }
+
+ // Final block.
+ Start = lis_.getMBBStartIdx(MBB);
+ if (Start != End)
+ li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+}
+
+/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// All needed values whose def is not inside [Start;End) must be defined
+/// beforehand so mapValue will work.
+void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
+ LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+ LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+
+ // Check if --I begins before Start and overlaps.
+ if (I != B) {
+ --I;
+ if (I->end > Start)
+ addSimpleRange(Start, std::min(End, I->end), I->valno);
+ ++I;
+ }
+
+ // The remaining ranges begin after Start.
+ for (;I != E && I->start < End; ++I)
+ addSimpleRange(I->start, std::min(End, I->end), I->valno);
+}
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
+ SmallVectorImpl<LiveInterval*> &intervals)
+ : sa_(sa), lis_(lis), vrm_(vrm),
+ mri_(vrm.getMachineFunction().getRegInfo()),
+ tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ curli_(sa_.getCurLI()),
+ dupli_(0), openli_(0),
+ intervals_(intervals),
+ firstInterval(intervals_.size())
+{
+ assert(curli_ && "SplitEditor created from empty SplitAnalysis");
+
+ // Make sure curli_ is assigned a stack slot, so all our intervals get the
+ // same slot as curli_.
+ if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
+ vrm_.assignVirt2StackSlot(curli_->reg);
+
+}
+
+LiveInterval *SplitEditor::createInterval() {
+ unsigned curli = sa_.getCurLI()->reg;
+ unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
+ LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
+ vrm_.grow();
+ vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
+ return &Intv;
+}
+
+LiveInterval *SplitEditor::getDupLI() {
+ if (!dupli_) {
+ // Create an interval for dupli that is a copy of curli.
+ dupli_ = createInterval();
+ dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+ }
+ return dupli_;
+}
+
+VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
+ VNInfo *&VNI = valueMap_[curliVNI];
+ if (!VNI)
+ VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
+ return VNI;
+}
+
+/// Insert a COPY instruction curli -> li. Allocate a new value from li
+/// defined by the COPY. Note that rewrite() will deal with the curli
+/// register, so this function can be used to copy from any interval - openli,
+/// curli, or dupli.
+VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
+ LI.reg).addReg(curli_->reg);
+ SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+}
+
+/// Create a new virtual register and live interval.
+void SplitEditor::openIntv() {
+ assert(!openli_ && "Previous LI not closed before openIntv");
+ openli_ = createInterval();
+ intervals_.push_back(openli_);
+ liveThrough_ = false;
+}
+
+/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
+/// not live before Idx, a COPY is not inserted.
+void SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(openli_ && "openIntv not called before enterIntvBefore");
+
+ // Copy from curli_ if it is live.
+ if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
+ MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+ VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
+ openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
+
+ // Make sure CurVNI is properly mapped.
+ VNInfo *&mapVNI = valueMap_[CurVNI];
+ // We dont have SSA update yet, so only one entry per value is allowed.
+ assert(!mapVNI && "enterIntvBefore called more than once for the same value");
+ mapVNI = VNI;
+ }
+ DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n');
+}
+
+/// enterIntvAtEnd - Enter openli at the end of MBB.
+/// PhiMBB is a successor inside openli where a PHI value is created.
+/// Currently, all entries must share the same PhiMBB.
+void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
+ assert(openli_ && "openIntv not called before enterIntvAtEnd");
+
+ SlotIndex EndA = lis_.getMBBEndIdx(&A);
+ VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
+ if (!CurVNIA) {
+ DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#"
+ << A.getNumber() << ".\n");
+ return;
+ }
+
+ // Add a phi kill value and live range out of A.
+ VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
+ openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
+
+ // FIXME: If this is the only entry edge, we don't need the extra PHI value.
+ // FIXME: If there are multiple entry blocks (so not a loop), we need proper
+ // SSA update.
+
+ // Now look at the start of B.
+ SlotIndex StartB = lis_.getMBBStartIdx(&B);
+ SlotIndex EndB = lis_.getMBBEndIdx(&B);
+ const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
+ if (!CurB) {
+ DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#"
+ << B.getNumber() << ".\n");
+ return;
+ }
+
+ VNInfo *VNIB = openli_->getVNInfoAt(StartB);
+ if (!VNIB) {
+ // Create a phi value.
+ VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNIB->setIsPHIDef(true);
+ VNInfo *&mapVNI = valueMap_[CurB->valno];
+ if (mapVNI) {
+ // Multiple copies - must create PHI value.
+ abort();
+ } else {
+ // This is the first copy of dupLR. Mark the mapping.
+ mapVNI = VNIB;
+ }
+
+ }
+
+ DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n');
+}
+
+/// useIntv - indicate that all instructions in MBB should use openli.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(openli_ && "openIntv not called before useIntv");
+
+ // Map the curli values from the interval into openli_
+ LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
+ LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+
+ if (I != B) {
+ --I;
+ // I begins before Start, but overlaps.
+ if (I->end > Start)
+ openli_->addRange(LiveRange(Start, std::min(End, I->end),
+ mapValue(I->valno)));
+ ++I;
+ }
+
+ // The remaining ranges begin after Start.
+ for (;I != E && I->start < End; ++I)
+ openli_->addRange(LiveRange(I->start, std::min(End, I->end),
+ mapValue(I->valno)));
+ DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_
+ << '\n');
+}
+
+/// leaveIntvAfter - Leave openli after the instruction at Idx.
+void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(openli_ && "openIntv not called before leaveIntvAfter");
+
+ const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
+ if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n");
+ return;
+ }
+
+ // Was this value of curli live through openli?
+ if (!openli_->liveAt(CurLR->valno->def)) {
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n");
+ liveThrough_ = true;
+ return;
+ }
+
+ // We are going to insert a back copy, so we must have a dupli_.
+ LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
+ assert(DupLR && "dupli not live into black, but curli is?");
+
+ // Insert the COPY instruction.
+ MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
+ MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
+ tii_.get(TargetOpcode::COPY), dupli_->reg)
+ .addReg(openli_->reg);
+ SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
+ mapValue(CurLR->valno)));
+ DupLR->valno->def = CopyIdx;
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+}
+
+/// leaveIntvAtTop - Leave the interval at the top of MBB.
+/// Currently, only one value can leave the interval.
+void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(openli_ && "openIntv not called before leaveIntvAtTop");
+
+ SlotIndex Start = lis_.getMBBStartIdx(&MBB);
+ const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
+
+ // Is curli even live-in to MBB?
+ if (!CurLR) {
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n");
+ return;
+ }
+
+ // Is curli defined by PHI at the beginning of MBB?
+ bool isPHIDef = CurLR->valno->isPHIDef() &&
+ CurLR->valno->def.getBaseIndex() == Start;
+
+ // If MBB is using a value of curli that was defined outside the openli range,
+ // we don't want to copy it back here.
+ if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Start
+ << ": using external value\n");
+ liveThrough_ = true;
+ return;
+ }
+
+ // We are going to insert a back copy, so we must have a dupli_.
+ LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
+ assert(DupLR && "dupli not live into black, but curli is?");
+
+ // Insert the COPY instruction.
+ MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
+ tii_.get(TargetOpcode::COPY), dupli_->reg)
+ .addReg(openli_->reg);
+ SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+
+ // Adjust dupli and openli values.
+ if (isPHIDef) {
+ // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
+ // and shift the dupli def down to the COPY.
+ VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ openli_->addRange(LiveRange(VNI->def, Idx, VNI));
+
+ dupli_->removeRange(Start, Idx);
+ DupLR->valno->def = Idx;
+ DupLR->valno->setIsPHIDef(false);
+ } else {
+ // The dupli value was defined somewhere inside the openli range.
+ DEBUG(dbgs() << " leaveIntvAtTop source value defined at "
+ << DupLR->valno->def << "\n");
+ // FIXME: We may not need a PHI here if all predecessors have the same
+ // value.
+ VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ openli_->addRange(LiveRange(VNI->def, Idx, VNI));
+
+ // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
+
+ // closeIntv is going to remove the superfluous live ranges.
+ DupLR->valno->def = Idx;
+ DupLR->valno->setIsPHIDef(false);
+ }
+
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+}
+
+/// closeIntv - Indicate that we are done editing the currently open
+/// LiveInterval, and ranges can be trimmed.
+void SplitEditor::closeIntv() {
+ assert(openli_ && "openIntv not called before closeIntv");
+
+ DEBUG(dbgs() << " closeIntv cleaning up\n");
+ DEBUG(dbgs() << " open " << *openli_ << '\n');
+
+ if (liveThrough_) {
+ DEBUG(dbgs() << " value live through region, leaving dupli as is.\n");
+ } else {
+ // live out with copies inserted, or killed by region. Either way we need to
+ // remove the overlapping region from dupli.
+ getDupLI();
+ for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
+ I != E; ++I) {
+ dupli_->removeRange(I->start, I->end);
+ }
+ // FIXME: A block branching to the entry block may also branch elsewhere
+ // curli is live. We need both openli and curli to be live in that case.
+ DEBUG(dbgs() << " dup2 " << *dupli_ << '\n');
+ }
+ openli_ = 0;
+ valueMap_.clear();
+}
+
+/// rewrite - after all the new live ranges have been created, rewrite
+/// instructions using curli to use the new intervals.
+void SplitEditor::rewrite() {
+ assert(!openli_ && "Previous LI not closed before rewrite");
+ const LiveInterval *curli = sa_.getCurLI();
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
+ RE = mri_.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ // FIXME: We can do much better with debug values.
+ MO.setReg(0);
+ continue;
+ }
+ SlotIndex Idx = lis_.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ LiveInterval *LI = dupli_;
+ for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
+ LiveInterval *testli = intervals_[i];
+ if (testli->liveAt(Idx)) {
+ LI = testli;
+ break;
+ }
+ }
+ if (LI) {
+ MO.setReg(LI->reg);
+ sa_.removeUse(MI);
+ DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI);
+ }
+ }
+
+ // dupli_ goes in last, after rewriting.
+ if (dupli_) {
+ if (dupli_->empty()) {
+ DEBUG(dbgs() << " dupli became empty?\n");
+ lis_.removeInterval(dupli_->reg);
+ dupli_ = 0;
+ } else {
+ dupli_->RenumberValues(lis_);
+ intervals_.push_back(dupli_);
+ }
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
+ for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
+ LiveInterval &li = *intervals_[i];
+ vrai.CalculateRegClass(li.reg);
+ vrai.CalculateWeightAndHint(li);
+ DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName()
+ << ":" << li << '\n');
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Loop Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
+ SplitAnalysis::LoopBlocks Blocks;
+ sa_.getLoopBlocks(Loop, Blocks);
+
+ // Break critical edges as needed.
+ SplitAnalysis::BlockPtrSet CriticalExits;
+ sa_.getCriticalExits(Blocks, CriticalExits);
+ assert(CriticalExits.empty() && "Cannot break critical exits yet");
+
+ // Create new live interval for the loop.
+ openIntv();
+
+ // Insert copies in the predecessors.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
+ E = Blocks.Preds.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
+ enterIntvAtEnd(MBB, *Loop->getHeader());
+ }
+
+ // Switch all loop blocks.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
+ E = Blocks.Loop.end(); I != E; ++I)
+ useIntv(**I);
+
+ // Insert back copies in the exit blocks.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
+ E = Blocks.Exits.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
+ leaveIntvAtTop(MBB);
+ }
+
+ // Done.
+ closeIntv();
+ rewrite();
+ return dupli_;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// splitSingleBlocks - Split curli into a separate live interval inside each
+/// basic block in Blocks. Return true if curli has been completely replaced,
+/// false if curli is still intact, and needs to be spilled or split further.
+bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+ DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+ // Determine the first and last instruction using curli in each block.
+ typedef std::pair<SlotIndex,SlotIndex> IndexPair;
+ typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
+ IndexPairMap MBBRange;
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
+ E = sa_.usingInstrs_.end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = (*I)->getParent();
+ if (!Blocks.count(MBB))
+ continue;
+ SlotIndex Idx = lis_.getInstructionIndex(*I);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
+ IndexPair &IP = MBBRange[MBB];
+ if (!IP.first.isValid() || Idx < IP.first)
+ IP.first = Idx;
+ if (!IP.second.isValid() || Idx > IP.second)
+ IP.second = Idx;
+ }
+
+ // Create a new interval for each block.
+ for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ IndexPair &IP = MBBRange[*I];
+ DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": ["
+ << IP.first << ';' << IP.second << ")\n");
+ assert(IP.first.isValid() && IP.second.isValid());
+
+ openIntv();
+ enterIntvBefore(IP.first);
+ useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
+ leaveIntvAfter(IP.second);
+ closeIntv();
+ }
+ rewrite();
+ return dupli_;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sub Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// and it wou pay to subdivide the interval inside that block, return it.
+/// Otherwise return NULL. The returned block can be passed to
+/// SplitEditor::splitInsideBlock.
+const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
+ // The interval must be exclusive to one block.
+ if (usingBlocks_.size() != 1)
+ return 0;
+ // Don't to this for less than 4 instructions. We want to be sure that
+ // splitting actually reduces the instruction count per interval.
+ if (usingInstrs_.size() < 4)
+ return 0;
+ return usingBlocks_.begin()->first;
+}
+
+/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
+/// true if curli has been completely replaced, false if curli is still
+/// intact, and needs to be spilled or split further.
+bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+ SmallVector<SlotIndex, 32> Uses;
+ Uses.reserve(sa_.usingInstrs_.size());
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
+ E = sa_.usingInstrs_.end(); I != E; ++I)
+ if ((*I)->getParent() == MBB)
+ Uses.push_back(lis_.getInstructionIndex(*I));
+ DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for "
+ << Uses.size() << " instructions.\n");
+ assert(Uses.size() >= 3 && "Need at least 3 instructions");
+ array_pod_sort(Uses.begin(), Uses.end());
+
+ // Simple algorithm: Find the largest gap between uses as determined by slot
+ // indices. Create new intervals for instructions before the gap and after the
+ // gap.
+ unsigned bestPos = 0;
+ int bestGap = 0;
+ DEBUG(dbgs() << " dist (" << Uses[0]);
+ for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
+ int g = Uses[i-1].distance(Uses[i]);
+ DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]);
+ if (g > bestGap)
+ bestPos = i, bestGap = g;
+ }
+ DEBUG(dbgs() << "), best: -" << bestGap << "-\n");
+
+ // bestPos points to the first use after the best gap.
+ assert(bestPos > 0 && "Invalid gap");
+
+ // FIXME: Don't create intervals for low densities.
+
+ // First interval before the gap. Don't create single-instr intervals.
+ if (bestPos > 1) {
+ openIntv();
+ enterIntvBefore(Uses.front());
+ useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
+ leaveIntvAfter(Uses[bestPos-1]);
+ closeIntv();
+ }
+
+ // Second interval after the gap.
+ if (bestPos < Uses.size()-1) {
+ openIntv();
+ enterIntvBefore(Uses[bestPos]);
+ useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
+ leaveIntvAfter(Uses.back());
+ closeIntv();
+ }
+
+ rewrite();
+ return dupli_;
+}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
new file mode 100644
index 000000000000..ddef7461dc3d
--- /dev/null
+++ b/lib/CodeGen/SplitKit.h
@@ -0,0 +1,321 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervals;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class VirtRegMap;
+class VNInfo;
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+ const MachineFunction &mf_;
+ const LiveIntervals &lis_;
+ const MachineLoopInfo &loops_;
+ const TargetInstrInfo &tii_;
+
+ // Instructions using the the current register.
+ typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
+ InstrPtrSet usingInstrs_;
+
+ // The number of instructions using curli in each basic block.
+ typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
+ BlockCountMap usingBlocks_;
+
+ // The number of basic block using curli in each loop.
+ typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
+ LoopCountMap usingLoops_;
+
+private:
+ // Current live interval.
+ const LiveInterval *curli_;
+
+ // Sumarize statistics by counting instructions using curli_.
+ void analyzeUses();
+
+ /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
+ /// analyzed.
+ bool canAnalyzeBranch(const MachineBasicBlock *MBB);
+
+public:
+ SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set curli to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// removeUse - Update statistics by noting that mi no longer uses curli.
+ void removeUse(const MachineInstr *mi);
+
+ const LiveInterval *getCurLI() { return curli_; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+ typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
+
+ // Sets of basic blocks surrounding a machine loop.
+ struct LoopBlocks {
+ BlockPtrSet Loop; // Blocks in the loop.
+ BlockPtrSet Preds; // Loop predecessor blocks.
+ BlockPtrSet Exits; // Loop exit blocks.
+
+ void clear() {
+ Loop.clear();
+ Preds.clear();
+ Exits.clear();
+ }
+ };
+
+ // Calculate the block sets surrounding the loop.
+ void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
+
+ /// LoopPeripheralUse - how is a variable used in and around a loop?
+ /// Peripheral blocks are the loop predecessors and exit blocks.
+ enum LoopPeripheralUse {
+ ContainedInLoop, // All uses are inside the loop.
+ SinglePeripheral, // At most one instruction per peripheral block.
+ MultiPeripheral, // Multiple instructions in some peripheral blocks.
+ OutsideLoop // Uses outside loop periphery.
+ };
+
+ /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
+ /// and around the Loop.
+ LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+
+ /// getCriticalExits - It may be necessary to partially break critical edges
+ /// leaving the loop if an exit block has phi uses of curli. Collect the exit
+ /// blocks that need special treatment into CriticalExits.
+ void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+
+ /// canSplitCriticalExits - Return true if it is possible to insert new exit
+ /// blocks before the blocks in CriticalExits.
+ bool canSplitCriticalExits(const LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits);
+
+ /// getBestSplitLoop - Return the loop where curli may best be split to a
+ /// separate register, or NULL.
+ const MachineLoop *getBestSplitLoop();
+
+ /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+ /// having curli split to a new live interval. Return true if Blocks can be
+ /// passed to SplitEditor::splitSingleBlocks.
+ bool getMultiUseBlocks(BlockPtrSet &Blocks);
+
+ /// getBlockForInsideSplit - If curli is contained inside a single basic block,
+ /// and it wou pay to subdivide the interval inside that block, return it.
+ /// Otherwise return NULL. The returned block can be passed to
+ /// SplitEditor::splitInsideBlock.
+ const MachineBasicBlock *getBlockForInsideSplit();
+};
+
+
+/// LiveIntervalMap - Map values from a large LiveInterval into a small
+/// interval that is a subset. Insert phi-def values as needed. This class is
+/// used by SplitEditor to create new smaller LiveIntervals.
+///
+/// parentli_ is the larger interval, li_ is the subset interval. Every value
+/// in li_ corresponds to exactly one value in parentli_, and the live range
+/// of the value is contained within the live range of the parentli_ value.
+/// Values in parentli_ may map to any number of openli_ values, including 0.
+class LiveIntervalMap {
+ LiveIntervals &lis_;
+
+ // The parent interval is never changed.
+ const LiveInterval &parentli_;
+
+ // The child interval's values are fully contained inside parentli_ values.
+ LiveInterval &li_;
+
+ typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
+
+ // Map parentli_ values to simple values in li_ that are defined at the same
+ // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+ // Note there is a difference between values mapping to NULL (complex), and
+ // values not present (unknown/unmapped).
+ ValueMap valueMap_;
+
+ // extendTo - Find the last li_ value defined in MBB at or before Idx. The
+ // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+ // Return the found VNInfo, or NULL.
+ VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
+
+ // addSimpleRange - Add a simple range from parentli_ to li_.
+ // ParentVNI must be live in the [Start;End) interval.
+ void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+
+public:
+ LiveIntervalMap(LiveIntervals &lis,
+ const LiveInterval &parentli,
+ LiveInterval &li)
+ : lis_(lis), parentli_(parentli), li_(li) {}
+
+ /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in parentli_.
+ /// Return the new li_ value.
+ VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+ /// assumed that ParentVNI is live at Idx.
+ /// If ParentVNI has not been defined by defValue, it is assumed that
+ /// ParentVNI->def dominates Idx.
+ /// If ParentVNI has been defined by defValue one or more times, a value that
+ /// dominates Idx will be returned. This may require creating extra phi-def
+ /// values and adding live ranges to li_.
+ VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+ /// All needed values whose def is not inside [Start;End) must be defined
+ /// beforehand so mapValue will work.
+ void addRange(SlotIndex Start, SlotIndex End);
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with rewrite().
+///
+class SplitEditor {
+ SplitAnalysis &sa_;
+ LiveIntervals &lis_;
+ VirtRegMap &vrm_;
+ MachineRegisterInfo &mri_;
+ const TargetInstrInfo &tii_;
+
+ /// curli_ - The immutable interval we are currently splitting.
+ const LiveInterval *const curli_;
+
+ /// dupli_ - Created as a copy of curli_, ranges are carved out as new
+ /// intervals get added through openIntv / closeIntv. This is used to avoid
+ /// editing curli_.
+ LiveInterval *dupli_;
+
+ /// Currently open LiveInterval.
+ LiveInterval *openli_;
+
+ /// createInterval - Create a new virtual register and LiveInterval with same
+ /// register class and spill slot as curli.
+ LiveInterval *createInterval();
+
+ /// getDupLI - Ensure dupli is created and return it.
+ LiveInterval *getDupLI();
+
+ /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
+ /// mappings, and do not include values created by inserted copies.
+ DenseMap<const VNInfo*, VNInfo*> valueMap_;
+
+ /// mapValue - Return the openIntv value that corresponds to the given curli
+ /// value.
+ VNInfo *mapValue(const VNInfo *curliVNI);
+
+ /// A dupli value is live through openIntv.
+ bool liveThrough_;
+
+ /// All the new intervals created for this split are added to intervals_.
+ SmallVectorImpl<LiveInterval*> &intervals_;
+
+ /// The index into intervals_ of the first interval we added. There may be
+ /// others from before we got it.
+ unsigned firstInterval;
+
+ /// Insert a COPY instruction curli -> li. Allocate a new value from li
+ /// defined by the COPY
+ VNInfo *insertCopy(LiveInterval &LI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+ SmallVectorImpl<LiveInterval*> &newIntervals);
+
+ /// getAnalysis - Get the corresponding analysis.
+ SplitAnalysis &getAnalysis() { return sa_; }
+
+ /// Create a new virtual register and live interval.
+ void openIntv();
+
+ /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
+ /// not live before Idx, a COPY is not inserted.
+ void enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter openli at the end of MBB.
+ /// PhiMBB is a successor inside openli where a PHI value is created.
+ /// Currently, all entries must share the same PhiMBB.
+ void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+
+ /// useIntv - indicate that all instructions in MBB should use openli.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use openli.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave openli after the instruction at Idx.
+ void leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Currently, only one value can leave the interval.
+ void leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// closeIntv - Indicate that we are done editing the currently open
+ /// LiveInterval, and ranges can be trimmed.
+ void closeIntv();
+
+ /// rewrite - after all the new live ranges have been created, rewrite
+ /// instructions using curli to use the new intervals.
+ void rewrite();
+
+ // ===--- High level methods ---===
+
+ /// splitAroundLoop - Split curli into a separate live interval inside
+ /// the loop. Return true if curli has been completely replaced, false if
+ /// curli is still intact, and needs to be spilled or split further.
+ bool splitAroundLoop(const MachineLoop*);
+
+ /// splitSingleBlocks - Split curli into a separate live interval inside each
+ /// basic block in Blocks. Return true if curli has been completely replaced,
+ /// false if curli is still intact, and needs to be spilled or split further.
+ bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+ /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
+ /// true if curli has been completely replaced, false if curli is still
+ /// intact, and needs to be spilled or split further.
+ bool splitInsideBlock(const MachineBasicBlock *);
+};
+
+}
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
new file mode 100644
index 000000000000..38f3b1f4d35e
--- /dev/null
+++ b/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,817 @@
+//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "SimpleRegisterCoalescing.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false);
+
+namespace llvm {
+
+ class StartSlotComparator {
+ public:
+ StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+ bool operator()(const MachineBasicBlock *mbb1,
+ const MachineBasicBlock *mbb2) const {
+ return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+ }
+ private:
+ LiveIntervals &lis;
+ };
+
+ class LoopSplit {
+ public:
+ LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+ : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Cannot split physical registers.");
+ }
+
+ LiveInterval& getLI() const { return li; }
+
+ MachineLoop& getLoop() const { return loop; }
+
+ bool isValid() const { return valid; }
+
+ bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+ void invalidate() { valid = false; }
+
+ void splitIncoming() { inSplit = true; }
+
+ void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+ void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+ void apply() {
+ assert(valid && "Attempt to apply invalid split.");
+ applyIncoming();
+ applyOutgoing();
+ copyRanges();
+ renameInside();
+ }
+
+ private:
+ LoopSplitter &ls;
+ LiveInterval &li;
+ MachineLoop &loop;
+ bool valid, inSplit;
+ std::set<MachineLoop::Edge> outSplits;
+ std::vector<MachineInstr*> loopInstrs;
+
+ LiveInterval *newLI;
+ std::map<VNInfo*, VNInfo*> vniMap;
+
+ LiveInterval* getNewLI() {
+ if (newLI == 0) {
+ const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+ unsigned vreg = ls.mri->createVirtualRegister(trc);
+ newLI = &ls.lis->getOrCreateInterval(vreg);
+ }
+ return newLI;
+ }
+
+ VNInfo* getNewVNI(VNInfo *oldVNI) {
+ VNInfo *newVNI = vniMap[oldVNI];
+
+ if (newVNI == 0) {
+ newVNI = getNewLI()->createValueCopy(oldVNI,
+ ls.lis->getVNInfoAllocator());
+ vniMap[oldVNI] = newVNI;
+ }
+
+ return newVNI;
+ }
+
+ void applyIncoming() {
+ if (!inSplit) {
+ return;
+ }
+
+ MachineBasicBlock *preHeader = loop.getLoopPreheader();
+ if (preHeader == 0) {
+ assert(ls.canInsertPreHeader(loop) &&
+ "Can't insert required preheader.");
+ preHeader = &ls.insertPreHeader(loop);
+ }
+
+ LiveRange *preHeaderRange =
+ ls.lis->findExitingRange(li, preHeader);
+ assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+ // Insert the new copy.
+ MachineInstr *copy = BuildMI(*preHeader,
+ preHeader->getFirstTerminator(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(getNewLI()->reg, RegState::Define)
+ .addReg(li.reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+ newVal->def = copyDefIdx;
+ newVal->setCopy(copy);
+ newVal->setIsDefAccurate(true);
+ li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+ getNewLI()->addRange(LiveRange(copyDefIdx,
+ ls.lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+ void applyOutgoing() {
+
+ for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+ osEnd = outSplits.end();
+ osItr != osEnd; ++osItr) {
+ MachineLoop::Edge edge = *osItr;
+ MachineBasicBlock *outBlock = edge.second;
+ if (ls.isCriticalEdge(edge)) {
+ assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+ outBlock = &ls.splitEdge(edge, loop);
+ }
+ LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+ assert(outRange != 0 && "No exiting range?");
+
+ MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(li.reg, RegState::Define)
+ .addReg(getNewLI()->reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ // Blow away output range definition.
+ outRange->valno->def = ls.lis->getInvalidIndex();
+ outRange->valno->setIsDefAccurate(false);
+ li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+ VNInfo *newVal =
+ getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
+ true),
+ 0, false, ls.lis->getVNInfoAllocator());
+
+ getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+ copyDefIdx, newVal));
+
+ }
+ }
+
+ void copyRange(LiveRange &lr) {
+ std::pair<bool, LoopSplitter::SlotPair> lsr =
+ ls.getLoopSubRange(lr, loop);
+
+ if (!lsr.first)
+ return;
+
+ LiveRange loopRange(lsr.second.first, lsr.second.second,
+ getNewVNI(lr.valno));
+
+ li.removeRange(loopRange.start, loopRange.end, true);
+
+ getNewLI()->addRange(loopRange);
+ }
+
+ void copyRanges() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+ if (instr.modifiesRegister(li.reg, 0)) {
+ LiveRange *defRange =
+ li.getLiveRangeContaining(instrIdx.getDefIndex());
+ if (defRange != 0) // May have caught this already.
+ copyRange(*defRange);
+ }
+ if (instr.readsRegister(li.reg, 0)) {
+ LiveRange *useRange =
+ li.getLiveRangeContaining(instrIdx.getUseIndex());
+ if (useRange != 0) { // May have caught this already.
+ copyRange(*useRange);
+ }
+ }
+ }
+
+ for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+ bbEnd = loop.block_end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock &loopBlock = **bbItr;
+ LiveRange *enteringRange =
+ ls.lis->findEnteringRange(li, &loopBlock);
+ if (enteringRange != 0) {
+ copyRange(*enteringRange);
+ }
+ }
+ }
+
+ void renameInside() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+ MachineOperand &mop = instr.getOperand(i);
+ if (mop.isReg() && mop.getReg() == li.reg) {
+ mop.setReg(getNewLI()->reg);
+ }
+ }
+ }
+ }
+
+ };
+
+ void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addPreserved<RegisterCoalescer>();
+ au.addPreserved<CalculateSpillWeights>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ sis = &getAnalysis<SlotIndexes>();
+ lis = &getAnalysis<LiveIntervals>();
+ mli = &getAnalysis<MachineLoopInfo>();
+ mdt = &getAnalysis<MachineDominatorTree>();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+ dumpOddTerminators();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+// std::deque<MachineLoop*> loops;
+// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+// dbgs() << "Loops:\n";
+// while (!loops.empty()) {
+// MachineLoop &loop = *loops.front();
+// loops.pop_front();
+// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+// dumpLoopInfo(loop);
+// }
+
+ //lis->dump();
+ //exit(0);
+
+ // Setup initial intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+ !lis->intervalIsInOneMBB(*li)) {
+ intervals.push_back(li);
+ }
+ }
+
+ processIntervals();
+
+ intervals.clear();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+ dumpOddTerminators();
+
+ //exit(1);
+
+ return false;
+ }
+
+ void LoopSplitter::releaseMemory() {
+ fqn.clear();
+ intervals.clear();
+ loopRangeMap.clear();
+ }
+
+ void LoopSplitter::dumpOddTerminators() {
+ for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock *mbb = &*bbItr;
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+ dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+ dbgs() << " Terminators:\n";
+ for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr *instr= &*iItr;
+ dbgs() << " " << *instr << "";
+ }
+ dbgs() << "\n Listed successors: [ ";
+ for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock *succMBB = *sItr;
+ dbgs() << succMBB->getNumber() << " ";
+ }
+ dbgs() << "]\n\n";
+ }
+ }
+ }
+
+ void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+ MachineBasicBlock &headerBlock = *loop.getHeader();
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ loop.getExitEdges(exitEdges);
+
+ dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+ for (std::vector<MachineBasicBlock*>::const_iterator
+ subBlockItr = loop.getBlocks().begin(),
+ subBlockEnd = loop.getBlocks().end();
+ subBlockItr != subBlockEnd; ++subBlockItr) {
+ MachineBasicBlock &subBlock = **subBlockItr;
+ dbgs() << "BB#" << subBlock.getNumber() << " ";
+ }
+ dbgs() << "], Exit edges: [ ";
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge &exitEdge = *exitEdgeItr;
+ dbgs() << "(MBB#" << exitEdge.first->getNumber()
+ << ", MBB#" << exitEdge.second->getNumber() << ") ";
+ }
+ dbgs() << "], Sub-Loop Headers: [ ";
+ for (MachineLoop::iterator subLoopItr = loop.begin(),
+ subLoopEnd = loop.end();
+ subLoopItr != subLoopEnd; ++subLoopItr) {
+ MachineLoop &subLoop = **subLoopItr;
+ MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+ dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+ mbb.updateTerminator();
+
+ for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+ miItr != miEnd; ++miItr) {
+ if (lis->isNotInMIMap(miItr)) {
+ lis->InsertMachineInstrInMaps(miItr);
+ }
+ }
+ }
+
+ bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+ MachineBasicBlock *header = loop.getHeader();
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+
+ for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+ pbEnd = header->pred_end();
+ pbItr != pbEnd; ++pbItr) {
+ MachineBasicBlock *predBlock = *pbItr;
+ if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+ return false;
+ }
+ }
+
+ MachineFunction::iterator headerItr(header);
+ if (headerItr == mf->begin())
+ return true;
+ MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+ assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+ return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+ assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+ MachineBasicBlock &header = *loop.getHeader();
+
+ // Save the preds - we'll need to update them once we insert the preheader.
+ typedef std::set<MachineBasicBlock*> HeaderPreds;
+ HeaderPreds headerPreds;
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (!loop.contains(*predItr))
+ headerPreds.insert(*predItr);
+ }
+
+ assert(!headerPreds.empty() && "No predecessors for header?");
+
+ //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+ MachineBasicBlock *preHeader =
+ mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+ assert(preHeader != 0 && "Failed to create pre-header.");
+
+ mf->insert(header, preHeader);
+
+ for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+ hpEnd = headerPreds.end();
+ hpItr != hpEnd; ++hpItr) {
+ assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+ MachineBasicBlock &hp = **hpItr;
+ hp.ReplaceUsesOfBlockWith(&header, preHeader);
+ }
+ preHeader->addSuccessor(&header);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(preHeader));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(preHeader);
+
+ if (MachineLoop *parentLoop = loop.getParentLoop()) {
+ assert(parentLoop->getHeader() != loop.getHeader() &&
+ "Parent loop has same header?");
+ parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (parentLoop != 0) {
+ loopRangeMap.erase(parentLoop);
+ parentLoop = parentLoop->getParentLoop();
+ }
+ }
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+
+ // Is this safe for physregs?
+ // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+ if (!lis->isLiveInToMBB(li, &header))
+ continue;
+
+ if (lis->isLiveInToMBB(li, preHeader)) {
+ assert(lis->isLiveOutOfMBB(li, preHeader) &&
+ "Range terminates in newly added preheader?");
+ continue;
+ }
+
+ bool insertRange = false;
+
+ for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+ predEnd = preHeader->pred_end();
+ predItr != predEnd; ++predItr) {
+ MachineBasicBlock *predMBB = *predItr;
+ if (lis->isLiveOutOfMBB(li, predMBB)) {
+ insertRange = true;
+ break;
+ }
+ }
+
+ if (!insertRange)
+ continue;
+
+ VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
+ 0, false, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+ lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+
+ //dbgs() << "Dumping SlotIndexes:\n";
+ //sis->dump();
+
+ //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+ return *preHeader;
+ }
+
+ bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+ assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+ if (edge.second->pred_size() > 1)
+ return true;
+ return false;
+ }
+
+ bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+ MachineFunction::iterator outBlockItr(edge.second);
+ if (outBlockItr == mf->begin())
+ return true;
+ MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+ assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+ !tii->AnalyzeBranch(*edge.first, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+ MachineLoop &loop) {
+
+ MachineBasicBlock &inBlock = *edge.first;
+ MachineBasicBlock &outBlock = *edge.second;
+
+ assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+ "Splitting non-critical edge?");
+
+ //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+ // << " -> MBB#" << outBlock.getNumber() << ")...";
+
+ MachineBasicBlock *splitBlock =
+ mf->CreateMachineBasicBlock();
+
+ assert(splitBlock != 0 && "Failed to create split block.");
+
+ mf->insert(&outBlock, splitBlock);
+
+ inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+ splitBlock->addSuccessor(&outBlock);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(splitBlock));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(splitBlock);
+
+ loopRangeMap.erase(&loop);
+
+ MachineLoop *splitParentLoop = loop.getParentLoop();
+ while (splitParentLoop != 0 &&
+ !splitParentLoop->contains(&outBlock)) {
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+
+ if (splitParentLoop != 0) {
+ assert(splitParentLoop->contains(&loop) &&
+ "Split-block parent doesn't contain original loop?");
+ splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (splitParentLoop != 0) {
+ loopRangeMap.erase(splitParentLoop);
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+ }
+
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+ bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+ lis->isLiveInToMBB(li, &outBlock);
+ if (lis->isLiveInToMBB(li, splitBlock)) {
+ if (!intersects) {
+ li.removeRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock), true);
+ }
+ } else if (intersects) {
+ VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
+ 0, false, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock),
+ newVal));
+ }
+ }
+
+ //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+ return *splitBlock;
+ }
+
+ LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+ typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+ LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+ if (lrItr == loopRangeMap.end()) {
+ LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
+ std::copy(loop.block_begin(), loop.block_end(),
+ std::inserter(loopMBBs, loopMBBs.begin()));
+
+ assert(!loopMBBs.empty() && "No blocks in loop?");
+
+ LoopRanges &loopRanges = loopRangeMap[&loop];
+ assert(loopRanges.empty() && "Loop encountered but not processed?");
+ SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+ loopRanges.push_back(
+ std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+ lis->getInvalidIndex()));
+ for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+ curBlockEnd = loopMBBs.end();
+ curBlockItr != curBlockEnd; ++curBlockItr) {
+ SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+ if (newStart != oldEnd) {
+ loopRanges.back().second = oldEnd;
+ loopRanges.push_back(std::make_pair(newStart,
+ lis->getInvalidIndex()));
+ }
+ oldEnd = lis->getMBBEndIdx(*curBlockItr);
+ }
+
+ loopRanges.back().second =
+ lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+ return loopRanges;
+ }
+ return lrItr->second;
+ }
+
+ std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+ const LiveRange &lr,
+ MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ LoopRanges::iterator lrItr = loopRanges.begin(),
+ lrEnd = loopRanges.end();
+ while (lrItr != lrEnd && lr.start >= lrItr->second) {
+ ++lrItr;
+ }
+
+ if (lrItr == lrEnd) {
+ SlotIndex invalid = lis->getInvalidIndex();
+ return std::make_pair(false, SlotPair(invalid, invalid));
+ }
+
+ SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+ SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+ return std::make_pair(true, SlotPair(srStart, srEnd));
+ }
+
+ void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+ for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+ lrItr != lrEnd; ++lrItr) {
+ dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::processHeader(LoopSplit &split) {
+ MachineBasicBlock &header = *split.getLoop().getHeader();
+ //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
+
+ if (!lis->isLiveInToMBB(split.getLI(), &header))
+ return; // Not live in, but nothing wrong so far.
+
+ MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+ if (!preHeader) {
+
+ if (!canInsertPreHeader(split.getLoop())) {
+ split.invalidate();
+ return; // Couldn't insert a pre-header. Bail on this interval.
+ }
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+ split.splitIncoming();
+ break;
+ }
+ }
+ } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+ split.splitIncoming();
+ }
+ }
+
+ void LoopSplitter::processLoopExits(LoopSplit &split) {
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ split.getLoop().getExitEdges(exitEdges);
+
+ //dbgs() << " Processing loop exits:\n";
+
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+ LiveRange *outRange =
+ split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+ if (outRange != 0) {
+ if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+ split.invalidate();
+ return;
+ }
+
+ split.splitOutgoing(exitEdge);
+ }
+ }
+ }
+
+ void LoopSplitter::processLoopUses(LoopSplit &split) {
+ std::set<MachineInstr*> processed;
+
+ for (MachineRegisterInfo::reg_iterator
+ rItr = mri->reg_begin(split.getLI().reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ MachineInstr &instr = *rItr;
+ if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+ split.addLoopInstr(&instr);
+ processed.insert(&instr);
+ }
+ }
+
+ //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
+ // << " in blocks [ ";
+ //dbgs() << "]\n";
+ }
+
+ bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Attempt to split physical register.");
+
+ LoopSplit split(*this, li, loop);
+ processHeader(split);
+ if (split.isValid())
+ processLoopExits(split);
+ if (split.isValid())
+ processLoopUses(split);
+ if (split.isValid() /* && split.isWorthwhile() */) {
+ split.apply();
+ DEBUG(dbgs() << "Success.\n");
+ return true;
+ }
+ DEBUG(dbgs() << "Failed.\n");
+ return false;
+ }
+
+ void LoopSplitter::processInterval(LiveInterval &li) {
+ std::deque<MachineLoop*> loops;
+ std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+ while (!loops.empty()) {
+ MachineLoop &loop = *loops.front();
+ loops.pop_front();
+ DEBUG(
+ dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+ << loop.getHeader()->getNumber() << " ";
+ );
+ if (!splitOverLoop(li, loop)) {
+ // Couldn't split over outer loop, schedule sub-loops to be checked.
+ std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+ }
+ }
+ }
+
+ void LoopSplitter::processIntervals() {
+ while (!intervals.empty()) {
+ LiveInterval &li = *intervals.front();
+ intervals.pop_front();
+
+ assert(!lis->intervalIsInOneMBB(li) &&
+ "Single interval in process worklist.");
+
+ processInterval(li);
+ }
+ }
+
+}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
new file mode 100644
index 000000000000..a726a7b834fb
--- /dev/null
+++ b/lib/CodeGen/Splitter.h
@@ -0,0 +1,99 @@
+//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITTER_H
+#define LLVM_CODEGEN_SPLITTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ struct LiveRange;
+ class LoopSplit;
+ class MachineDominatorTree;
+ class MachineRegisterInfo;
+ class SlotIndexes;
+ class TargetInstrInfo;
+ class VNInfo;
+
+ class LoopSplitter : public MachineFunctionPass {
+ friend class LoopSplit;
+ public:
+ static char ID;
+
+ LoopSplitter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+
+ private:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ MachineLoopInfo *mli;
+ MachineRegisterInfo *mri;
+ MachineDominatorTree *mdt;
+ SlotIndexes *sis;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ std::string fqn;
+ std::deque<LiveInterval*> intervals;
+
+ typedef std::pair<SlotIndex, SlotIndex> SlotPair;
+ typedef std::vector<SlotPair> LoopRanges;
+ typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
+ LoopRangeMap loopRangeMap;
+
+ void dumpLoopInfo(MachineLoop &loop);
+
+ void dumpOddTerminators();
+
+ void updateTerminators(MachineBasicBlock &mbb);
+
+ bool canInsertPreHeader(MachineLoop &loop);
+ MachineBasicBlock& insertPreHeader(MachineLoop &loop);
+
+ bool isCriticalEdge(MachineLoop::Edge &edge);
+ bool canSplitEdge(MachineLoop::Edge &edge);
+ MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
+
+ LoopRanges& getLoopRanges(MachineLoop &loop);
+ std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
+ MachineLoop &loop);
+
+ void dumpLoopRanges(MachineLoop &loop);
+
+ void processHeader(LoopSplit &split);
+ void processLoopExits(LoopSplit &split);
+ void processLoopUses(LoopSplit &split);
+
+ bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
+
+ void processInterval(LiveInterval &li);
+
+ void processIntervals();
+ };
+
+}
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index ca5c28ce010c..9f51778da756 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -62,17 +62,17 @@ namespace {
bool RequiresStackProtector() const;
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(&ID), TLI(0) {}
+ StackProtector() : FunctionPass(ID), TLI(0) {}
StackProtector(const TargetLowering *tli)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
virtual bool runOnFunction(Function &Fn);
};
} // end anonymous namespace
char StackProtector::ID = 0;
-static RegisterPass<StackProtector>
-X("stack-protector", "Insert stack protectors");
+INITIALIZE_PASS(StackProtector, "stack-protector",
+ "Insert stack protectors", false, false);
FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
return new StackProtector(tli);
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index eff3c33e3daa..8d57ae95dde2 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,9 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
StackSlotColoring(bool RegColor) :
- MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -119,7 +119,6 @@ namespace {
private:
void InitializeSlots();
- bool CheckForSetJmpCall(const MachineFunction &MF) const;
void ScanForSpillSlotRefs(MachineFunction &MF);
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
@@ -146,8 +145,8 @@ namespace {
char StackSlotColoring::ID = 0;
-static RegisterPass<StackSlotColoring>
-X("stack-slot-coloring", "Stack Slot Coloring");
+INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false);
FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
return new StackSlotColoring(RegColor);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 59315cf67282..894dbfa28bac 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
namespace {
struct StrongPHIElimination : public MachineFunctionPass {
static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(&ID) {}
+ StrongPHIElimination() : MachineFunctionPass(ID) {}
// Waiting stores, for each MBB, the set of copies that need to
// be inserted into that MBB
@@ -150,11 +150,10 @@ namespace {
}
char StrongPHIElimination::ID = 0;
-static RegisterPass<StrongPHIElimination>
-X("strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently");
+INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false);
-const PassInfo *const llvm::StrongPHIEliminationID = &X;
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
/// of the given MachineFunction. These numbers are then used in other parts
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 075db803bd23..a815b364d54e 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -69,7 +69,7 @@ namespace {
public:
static char ID;
explicit TailDuplicatePass(bool PreRA) :
- MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Tail Duplication"; }
@@ -254,14 +254,15 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
// SSA form.
for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
MachineInstr *Copy = Copies[i];
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) {
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
- if (++UI == MRI->use_end()) {
- // Copy is the only use. Do trivial copy propagation here.
- MRI->replaceRegWith(Dst, Src);
- Copy->eraseFromParent();
- }
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
}
}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index cdacb98e0e88..6e4a0d837ecd 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -178,19 +178,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
return MF.CloneMachineInstr(Orig);
}
-unsigned
-TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
- unsigned FnSize = 0;
- for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
- I != E; ++I)
- FnSize += GetInstSizeInBytes(I);
- }
- return FnSize;
-}
-
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a80cfc4b256f..f1e10eec724c 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -519,11 +519,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
ConstTextCoalSection
= getContext().getMachOSection("__TEXT", "__const_coal",
MCSectionMachO::S_COALESCED,
- SectionKind::getText());
- ConstDataCoalSection
- = getContext().getMachOSection("__DATA","__const_coal",
- MCSectionMachO::S_COALESCED,
- SectionKind::getText());
+ SectionKind::getReadOnly());
ConstDataSection // .const_data
= getContext().getMachOSection("__DATA", "__const", 0,
SectionKind::getReadOnlyWithRel());
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 564914373bb5..78989c567e42 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -138,7 +138,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -159,10 +159,10 @@ namespace {
}
char TwoAddressInstructionPass::ID = 0;
-static RegisterPass<TwoAddressInstructionPass>
-X("twoaddressinstruction", "Two-Address instruction pass");
+INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false);
-const PassInfo *const llvm::TwoAddressInstructionPassID = &X;
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
/// Sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
@@ -380,26 +380,18 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
bool &IsSrcPhys, bool &IsDstPhys) {
SrcReg = 0;
DstReg = 0;
- unsigned SrcSubIdx, DstSubIdx;
- if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (MI.isCopy()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- } else if (MI.isInsertSubreg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- } else if (MI.isSubregToReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- }
- }
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
- if (DstReg) {
- IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- return true;
- }
- return false;
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
}
/// isKilled - Test if the given register value, which is used by the given
@@ -1454,7 +1446,17 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
//
// If the REG_SEQUENCE doesn't kill its source, keeping live variables
// correctly up to date becomes very difficult. Insert a copy.
- //
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI->getOperand(j).getReg() == SrcReg) {
+ MI->getOperand(j).setIsKill();
+ isKill = false;
+ break;
+ }
+
MachineBasicBlock::iterator InsertLoc = MI;
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 7b338126d475..6dd333358bc4 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -43,7 +43,7 @@ namespace {
virtual bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(&ID) {}
+ UnreachableBlockElim() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<ProfileInfo>();
@@ -51,8 +51,8 @@ namespace {
};
}
char UnreachableBlockElim::ID = 0;
-static RegisterPass<UnreachableBlockElim>
-X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false);
FunctionPass *llvm::createUnreachableBlockEliminationPass() {
return new UnreachableBlockElim();
@@ -100,16 +100,15 @@ namespace {
MachineModuleInfo *MMI;
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {}
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
};
}
char UnreachableMachineBlockElim::ID = 0;
-static RegisterPass<UnreachableMachineBlockElim>
-Y("unreachable-mbb-elimination",
- "Remove unreachable machine basic blocks");
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false);
-const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ed0269695dfe..20ffcffa70d3 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -48,8 +48,7 @@ STATISTIC(NumSpills , "Number of register spills");
char VirtRegMap::ID = 0;
-static RegisterPass<VirtRegMap>
-X("virtregmap", "Virtual Register Map");
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index a5599f68b64e..8b6082d18193 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -139,7 +139,7 @@ namespace llvm {
public:
static char ID;
- VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
+ VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
Virt2StackSlotMap(NO_STACK_SLOT),
Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
@@ -152,6 +152,11 @@ namespace llvm {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunction &getMachineFunction() const {
+ assert(MF && "getMachineFunction called before runOnMAchineFunction");
+ return *MF;
+ }
+
void grow();
/// @brief returns true if the specified virtual register is
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 57a1500e6e9d..240d28cf3011 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -67,23 +67,16 @@ VirtRegRewriter::~VirtRegRewriter() {}
/// Note that operands may be added, so the MO reference is no longer valid.
static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
const TargetRegisterInfo &TRI) {
- if (unsigned SubIdx = MO.getSubReg()) {
- // Insert the physical subreg and reset the subreg field.
- MO.setReg(TRI.getSubReg(Reg, SubIdx));
- MO.setSubReg(0);
-
- // Any def, dead, and kill flags apply to the full virtual register, so they
- // also apply to the full physical register. Add imp-def/dead and imp-kill
- // as needed.
+ if (MO.getSubReg()) {
+ MO.substPhysReg(Reg, TRI);
+
+ // Any kill flags apply to the full virtual register, so they also apply to
+ // the full physical register.
+ // We assume that partial defs have already been decorated with a super-reg
+ // <imp-def> operand by LiveIntervals.
MachineInstr &MI = *MO.getParent();
- if (MO.isDef())
- if (MO.isDead())
- MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
- else
- MI.addRegisterDefined(Reg, &TRI);
- else if (!MO.isUndef() &&
- (MO.isKill() ||
- MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+ if (MO.isUse() && !MO.isUndef() &&
+ (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
} else {
MO.setReg(Reg);
@@ -460,7 +453,7 @@ public:
/// blocks each of which is a successor of the specified BB and has no other
/// predecessor.
static void findSinglePredSuccessor(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ SmallVectorImpl<MachineBasicBlock *> &Succs){
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
@@ -852,8 +845,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
// Yup, use the reload register that we didn't use before.
unsigned NewReg = Op.AssignedPhysReg;
Rejected.insert(PhysReg);
- return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected,
- RegKills, KillOps, VRM);
+ return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
} else {
// Otherwise, we might also have a problem if a previously reused
// value aliases the new register. If so, codegen the previous reload
@@ -1864,7 +1857,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them. If possible, avid reloading vregs.
+/// register allocator is done with them. If possible, avoid reloading vregs.
void
LocalRewriter::RewriteMBB(LiveIntervals *LIs,
AvailableSpills &Spills, BitVector &RegKills,
@@ -1914,7 +1907,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
if (InsertSpills(MII))
NextMII = llvm::next(MII);
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
bool Erased = false;
bool BackTracked = false;
MachineInstr &MI = *MII;
@@ -2028,14 +2020,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
Spills.canClobberPhysReg(PhysReg);
}
- // If this is an asm, and PhysReg is used elsewhere as an earlyclobber
- // operand, we can't also use it as an input. (Outputs always come
- // before inputs, so we can stop looking at i.)
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
if (MI.isInlineAsm()) {
- for (unsigned k=0; k<i; ++k) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) {
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg << ": " << MOk << '\n');
break;
}
}
@@ -2248,15 +2242,22 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// If we have folded references to memory operands, make sure we clear all
// physical registers that may contain the value of the spilled virtual
// register
+
+ // Copy the folded virts to a small vector, we may change MI2VirtMap.
+ SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
+ // C++0x FTW!
+ for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
+ VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
+ VRM->getFoldedVirts(&MI);
+ FVRange.first != FVRange.second; ++FVRange.first)
+ FoldedVirts.push_back(FVRange.first->second);
+
SmallSet<int, 2> FoldedSS;
- for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
- unsigned VirtReg = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
+ for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
+ unsigned VirtReg = FoldedVirts[FVI].first;
+ VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR);
- // MI2VirtMap be can updated which invalidate the iterator.
- // Increment the iterator first.
- ++I;
int SS = VRM->getStackSlot(VirtReg);
if (SS == VirtRegMap::NO_STACK_SLOT)
continue;
@@ -2302,7 +2303,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
SmallVector<MachineInstr*, 4> NewMIs;
if (PhysReg &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
VRM->RemoveMachineInstrFromMaps(&MI);
@@ -2442,28 +2443,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
}
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
- Src == Dst && SrcSR == DstSR &&
- !MI.findRegisterUseOperand(Src)->isUndef()) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
- if (MO.isDead() && !KillRegs.empty()) {
- // Source register or an implicit super/sub-register use is killed.
- assert(KillRegs[0] == Dst ||
- TRI->isSubRegister(KillRegs[0], Dst) ||
- TRI->isSuperRegister(KillRegs[0], Dst));
- // Last def is now dead.
- TransferDeadness(Src, RegKills, KillOps);
- }
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
- Erased = true;
- Spills.disallowClobberPhysReg(VirtReg);
- goto ProcessNextInst;
- }
// If it's not a no-op copy, it clobbers the value in the destreg.
Spills.ClobberPhysReg(VirtReg);
@@ -2541,20 +2520,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
UpdateKills(*LastStore, TRI, RegKills, KillOps);
goto ProcessNextInst;
}
- {
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
- Src == Dst && SrcSR == DstSR) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
- Erased = true;
- UpdateKills(*LastStore, TRI, RegKills, KillOps);
- goto ProcessNextInst;
- }
- }
}
}
ProcessNextInst:
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 5f30dce5855f..0be80496a3cb 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -13,6 +13,7 @@
#include "llvm/CompilerDriver/Action.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SystemUtils.h"
@@ -58,11 +59,15 @@ namespace {
if (prog.isEmpty()) {
prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
- if (prog.isEmpty())
- throw std::runtime_error("Can't find program '" + name + "'");
+ if (prog.isEmpty()) {
+ PrintError("Can't find program '" + name + "'");
+ return -1;
+ }
+ }
+ if (!prog.canExecute()) {
+ PrintError("Program '" + name + "' is not executable.");
+ return -1;
}
- if (!prog.canExecute())
- throw std::runtime_error("Program '" + name + "' is not executable.");
// Build the command line vector and the redirects array.
const sys::Path* redirects[3] = {0,0,0};
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
index d1ac8c98322c..38442038d738 100644
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -19,7 +19,7 @@
namespace cl = llvm::cl;
-// External linkage here is intentional.
+namespace llvmc {
cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
cl::ZeroOrMore);
@@ -57,3 +57,5 @@ cl::opt<SaveTempsEnum::Values> SaveTemps
clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
clEnumValEnd),
cl::ValueOptional);
+
+} // End namespace llvmc.
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 7d1c7fe4a62b..d0c0e15bcdb7 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -25,39 +25,46 @@
#include <iterator>
#include <limits>
#include <queue>
-#include <stdexcept>
using namespace llvm;
using namespace llvmc;
namespace llvmc {
- const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
+ const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
StringRef suf = File.getSuffix();
LanguageMap::const_iterator Lang =
this->find(suf.empty() ? "*empty*" : suf);
- if (Lang == this->end())
- throw std::runtime_error("File '" + File.str() +
- "' has unknown suffix '" + suf.str() + '\'');
- return Lang->second;
+ if (Lang == this->end()) {
+ PrintError("File '" + File.str() + "' has unknown suffix '"
+ + suf.str() + '\'');
+ return 0;
+ }
+ return &Lang->second;
}
}
namespace {
- /// ChooseEdge - Return the edge with the maximum weight.
+ /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
template <class C>
const Edge* ChooseEdge(const C& EdgesContainer,
const InputLanguagesSet& InLangs,
const std::string& NodeName = "root") {
const Edge* MaxEdge = 0;
- unsigned MaxWeight = 0;
+ int MaxWeight = 0;
bool SingleMax = true;
+ // TODO: fix calculation of SingleMax.
for (typename C::const_iterator B = EdgesContainer.begin(),
E = EdgesContainer.end(); B != E; ++B) {
const Edge* e = B->getPtr();
- unsigned EW = e->Weight(InLangs);
+ int EW = e->Weight(InLangs);
+ if (EW < 0) {
+ // (error) invocation in TableGen -> we don't need to print an error
+ // message.
+ return 0;
+ }
if (EW > MaxWeight) {
MaxEdge = e;
MaxWeight = EW;
@@ -67,14 +74,16 @@ namespace {
}
}
- if (!SingleMax)
- throw std::runtime_error("Node " + NodeName +
- ": multiple maximal outward edges found!"
- " Most probably a specification error.");
- if (!MaxEdge)
- throw std::runtime_error("Node " + NodeName +
- ": no maximal outward edge found!"
- " Most probably a specification error.");
+ if (!SingleMax) {
+ PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
+ " Most probably a specification error.");
+ return 0;
+ }
+ if (!MaxEdge) {
+ PrintError("Node " + NodeName + ": no maximal outward edge found!"
+ " Most probably a specification error.");
+ return 0;
+ }
return MaxEdge;
}
@@ -98,29 +107,34 @@ CompilationGraph::CompilationGraph() {
NodesMap["root"] = Node(this);
}
-Node& CompilationGraph::getNode(const std::string& ToolName) {
+Node* CompilationGraph::getNode(const std::string& ToolName) {
nodes_map_type::iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end())
- throw std::runtime_error("Node " + ToolName + " is not in the graph");
- return I->second;
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph");
+ return 0;
+ }
+ return &I->second;
}
-const Node& CompilationGraph::getNode(const std::string& ToolName) const {
+const Node* CompilationGraph::getNode(const std::string& ToolName) const {
nodes_map_type::const_iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end())
- throw std::runtime_error("Node " + ToolName + " is not in the graph!");
- return I->second;
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph!");
+ return 0;
+ }
+ return &I->second;
}
// Find the tools list corresponding to the given language name.
-const CompilationGraph::tools_vector_type&
+const CompilationGraph::tools_vector_type*
CompilationGraph::getToolsVector(const std::string& LangName) const
{
tools_map_type::const_iterator I = ToolsMap.find(LangName);
- if (I == ToolsMap.end())
- throw std::runtime_error("No tool corresponding to the language "
- + LangName + " found");
- return I->second;
+ if (I == ToolsMap.end()) {
+ PrintError("No tool corresponding to the language " + LangName + " found");
+ return 0;
+ }
+ return &I->second;
}
void CompilationGraph::insertNode(Tool* V) {
@@ -128,29 +142,37 @@ void CompilationGraph::insertNode(Tool* V) {
NodesMap[V->Name()] = Node(this, V);
}
-void CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
- Node& B = getNode(Edg->ToolName());
+int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+ Node* B = getNode(Edg->ToolName());
+ if (B == 0)
+ return 1;
+
if (A == "root") {
- const char** InLangs = B.ToolPtr->InputLanguages();
+ const char** InLangs = B->ToolPtr->InputLanguages();
for (;*InLangs; ++InLangs)
ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
NodesMap["root"].AddEdge(Edg);
}
else {
- Node& N = getNode(A);
- N.AddEdge(Edg);
+ Node* N = getNode(A);
+ if (N == 0)
+ return 1;
+
+ N->AddEdge(Edg);
}
// Increase the inward edge counter.
- B.IncrInEdges();
+ B->IncrInEdges();
+
+ return 0;
}
// Pass input file through the chain until we bump into a Join node or
// a node that says that it is the last.
-void CompilationGraph::PassThroughGraph (const sys::Path& InFile,
- const Node* StartNode,
- const InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) const {
+int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+ const Node* StartNode,
+ const InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) const {
sys::Path In = InFile;
const Node* CurNode = StartNode;
@@ -158,25 +180,35 @@ void CompilationGraph::PassThroughGraph (const sys::Path& InFile,
Tool* CurTool = CurNode->ToolPtr.getPtr();
if (CurTool->IsJoin()) {
- JoinTool& JT = dynamic_cast<JoinTool&>(*CurTool);
+ JoinTool& JT = static_cast<JoinTool&>(*CurTool);
JT.AddToJoinList(In);
break;
}
- Action CurAction = CurTool->GenerateAction(In, CurNode->HasChildren(),
- TempDir, InLangs, LangMap);
+ Action CurAction;
+ if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
if (int ret = CurAction.Execute())
- throw error_code(ret);
+ return ret;
if (CurAction.StopCompilation())
- return;
+ return 0;
+
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ CurNode = getNode(Edg->ToolName());
+ if (CurNode == 0)
+ return 1;
- CurNode = &getNode(ChooseEdge(CurNode->OutEdges,
- InLangs,
- CurNode->Name())->ToolName());
In = CurAction.OutFile();
}
+
+ return 0;
}
// Find the head of the toolchain corresponding to the given file.
@@ -186,26 +218,39 @@ FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
// Determine the input language.
- const std::string& InLanguage =
- ForceLanguage ? *ForceLanguage : LangMap.GetLanguage(In);
+ const std::string* InLang = LangMap.GetLanguage(In);
+ if (InLang == 0)
+ return 0;
+ const std::string& InLanguage = (ForceLanguage ? *ForceLanguage : *InLang);
// Add the current input language to the input language set.
InLangs.insert(InLanguage);
// Find the toolchain for the input language.
- const tools_vector_type& TV = getToolsVector(InLanguage);
- if (TV.empty())
- throw std::runtime_error("No toolchain corresponding to language "
- + InLanguage + " found");
- return &getNode(ChooseEdge(TV, InLangs)->ToolName());
+ const tools_vector_type* pTV = getToolsVector(InLanguage);
+ if (pTV == 0)
+ return 0;
+
+ const tools_vector_type& TV = *pTV;
+ if (TV.empty()) {
+ PrintError("No toolchain corresponding to language "
+ + InLanguage + " found");
+ return 0;
+ }
+
+ const Edge* Edg = ChooseEdge(TV, InLangs);
+ if (Edg == 0)
+ return 0;
+
+ return getNode(Edg->ToolName());
}
// Helper function used by Build().
// Traverses initial portions of the toolchains (up to the first Join node).
// This function is also responsible for handling the -x option.
-void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) {
+int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
// This is related to -x option handling.
cl::list<std::string>::const_iterator xIter = Languages.begin(),
xBegin = xIter, xEnd = Languages.end();
@@ -255,15 +300,25 @@ void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
// Find the toolchain corresponding to this file.
const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+ if (N == 0)
+ return 1;
// Pass file through the chain starting at head.
- PassThroughGraph(In, N, InLangs, TempDir, LangMap);
+ if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
+ return ret;
}
+
+ return 0;
}
// Sort the nodes in topological order.
-void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
std::queue<const Node*> Q;
- Q.push(&getNode("root"));
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
while (!Q.empty()) {
const Node* A = Q.front();
@@ -271,12 +326,17 @@ void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
Out.push_back(A);
for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
EB != EE; ++EB) {
- Node* B = &getNode((*EB)->ToolName());
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
B->DecrInEdges();
if (B->HasNoInEdges())
Q.push(B);
}
}
+
+ return 0;
}
namespace {
@@ -287,49 +347,71 @@ namespace {
// Call TopologicalSort and filter the resulting list to include
// only Join nodes.
-void CompilationGraph::
+int CompilationGraph::
TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
std::vector<const Node*> TopSorted;
- TopologicalSort(TopSorted);
+ if (int ret = TopologicalSort(TopSorted))
+ return ret;
std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
std::back_inserter(Out), NotJoinNode);
+
+ return 0;
}
int CompilationGraph::Build (const sys::Path& TempDir,
const LanguageMap& LangMap) {
-
InputLanguagesSet InLangs;
+ bool WasSomeActionGenerated = !InputFilenames.empty();
// Traverse initial parts of the toolchains and fill in InLangs.
- BuildInitial(InLangs, TempDir, LangMap);
+ if (int ret = BuildInitial(InLangs, TempDir, LangMap))
+ return ret;
std::vector<const Node*> JTV;
- TopologicalSortFilterJoinNodes(JTV);
+ if (int ret = TopologicalSortFilterJoinNodes(JTV))
+ return ret;
// For all join nodes in topological order:
for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
B != E; ++B) {
const Node* CurNode = *B;
- JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+ JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
// Are there any files in the join list?
if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
continue;
- Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
- TempDir, InLangs, LangMap);
+ WasSomeActionGenerated = true;
+ Action CurAction;
+ if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
if (int ret = CurAction.Execute())
- throw error_code(ret);
+ return ret;
if (CurAction.StopCompilation())
return 0;
- const Node* NextNode = &getNode(ChooseEdge(CurNode->OutEdges, InLangs,
- CurNode->Name())->ToolName());
- PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
- InLangs, TempDir, LangMap);
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ const Node* NextNode = getNode(Edg->ToolName());
+ if (NextNode == 0)
+ return 1;
+
+ if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+ InLangs, TempDir, LangMap)) {
+ return ret;
+ }
+ }
+
+ if (!WasSomeActionGenerated) {
+ PrintError("no input files");
+ return 1;
}
return 0;
@@ -337,6 +419,7 @@ int CompilationGraph::Build (const sys::Path& TempDir,
int CompilationGraph::CheckLanguageNames() const {
int ret = 0;
+
// Check that names for output and input languages on all edges do match.
for (const_nodes_iterator B = this->NodesMap.begin(),
E = this->NodesMap.end(); B != E; ++B) {
@@ -345,9 +428,11 @@ int CompilationGraph::CheckLanguageNames() const {
if (N1.ToolPtr) {
for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
EB != EE; ++EB) {
- const Node& N2 = this->getNode((*EB)->ToolName());
+ const Node* N2 = this->getNode((*EB)->ToolName());
+ if (N2 == 0)
+ return 1;
- if (!N2.ToolPtr) {
+ if (!N2->ToolPtr) {
++ret;
errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
<< "' back to the root!\n\n";
@@ -355,7 +440,7 @@ int CompilationGraph::CheckLanguageNames() const {
}
const char* OutLang = N1.ToolPtr->OutputLanguage();
- const char** InLangs = N2.ToolPtr->InputLanguages();
+ const char** InLangs = N2->ToolPtr->InputLanguages();
bool eq = false;
for (;*InLangs; ++InLangs) {
if (std::strcmp(OutLang, *InLangs) == 0) {
@@ -367,11 +452,11 @@ int CompilationGraph::CheckLanguageNames() const {
if (!eq) {
++ret;
errs() << "Error: Output->input language mismatch in the edge '"
- << N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name()
+ << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
<< "'!\n"
<< "Expected one of { ";
- InLangs = N2.ToolPtr->InputLanguages();
+ InLangs = N2->ToolPtr->InputLanguages();
for (;*InLangs; ++InLangs) {
errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
}
@@ -395,7 +480,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
for (const_nodes_iterator B = this->NodesMap.begin(),
E = this->NodesMap.end(); B != E; ++B) {
const Node& N = B->second;
- unsigned MaxWeight = 0;
+ int MaxWeight = 0;
// Ignore the root node.
if (!N.ToolPtr)
@@ -403,7 +488,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
EB != EE; ++EB) {
- unsigned EdgeWeight = (*EB)->Weight(Dummy);
+ int EdgeWeight = (*EB)->Weight(Dummy);
if (EdgeWeight > MaxWeight) {
MaxWeight = EdgeWeight;
}
@@ -422,7 +507,12 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
int CompilationGraph::CheckCycles() {
unsigned deleted = 0;
std::queue<Node*> Q;
- Q.push(&getNode("root"));
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
// Try to delete all nodes that have no ingoing edges, starting from the
// root. If there are any nodes left after this operation, then we have a
@@ -434,7 +524,10 @@ int CompilationGraph::CheckCycles() {
for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
EB != EE; ++EB) {
- Node* B = &getNode((*EB)->ToolName());
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
B->DecrInEdges();
if (B->HasNoInEdges())
Q.push(B);
@@ -453,18 +546,28 @@ int CompilationGraph::CheckCycles() {
int CompilationGraph::Check () {
// We try to catch as many errors as we can in one go.
+ int errs = 0;
int ret = 0;
// Check that output/input language names match.
- ret += this->CheckLanguageNames();
+ ret = this->CheckLanguageNames();
+ if (ret < 0)
+ return 1;
+ errs += ret;
// Check for multiple default edges.
- ret += this->CheckMultipleDefaultEdges();
+ ret = this->CheckMultipleDefaultEdges();
+ if (ret < 0)
+ return 1;
+ errs += ret;
// Check for cycles.
- ret += this->CheckCycles();
+ ret = this->CheckCycles();
+ if (ret < 0)
+ return 1;
+ errs += ret;
- return ret;
+ return errs;
}
// Code related to graph visualization.
@@ -516,7 +619,7 @@ namespace llvm {
}
-void CompilationGraph::writeGraph(const std::string& OutputFilename) {
+int CompilationGraph::writeGraph(const std::string& OutputFilename) {
std::string ErrorInfo;
raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
@@ -526,9 +629,11 @@ void CompilationGraph::writeGraph(const std::string& OutputFilename) {
errs() << "done.\n";
}
else {
- throw std::runtime_error("Error opening file '" + OutputFilename
- + "' for writing!");
+ PrintError("Error opening file '" + OutputFilename + "' for writing!");
+ return 1;
}
+
+ return 0;
}
void CompilationGraph::viewGraph() {
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index b5e507dfc3a3..0a6613aa77a3 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -11,16 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CompilerDriver/AutoGenerated.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
#include "llvm/CompilerDriver/CompilationGraph.h"
#include "llvm/CompilerDriver/Error.h"
-#include "llvm/CompilerDriver/Plugin.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
#include <sstream>
-#include <stdexcept>
#include <string>
namespace cl = llvm::cl;
@@ -31,9 +30,9 @@ namespace {
std::stringstream* GlobalTimeLog;
- sys::Path getTempDir() {
- sys::Path tempDir;
-
+ /// GetTempDir - Get the temporary directory location. Returns non-zero value
+ /// on error.
+ int GetTempDir(sys::Path& tempDir) {
// The --temp-dir option.
if (!TempDirname.empty()) {
tempDir = TempDirname;
@@ -41,7 +40,7 @@ namespace {
// GCC 4.5-style -save-temps handling.
else if (SaveTemps == SaveTempsEnum::Unset) {
tempDir = sys::Path::GetTemporaryDirectory();
- return tempDir;
+ return 0;
}
else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
tempDir = OutputFilename;
@@ -49,35 +48,35 @@ namespace {
}
else {
// SaveTemps == Cwd --> use current dir (leave tempDir empty).
- return tempDir;
+ return 0;
}
if (!tempDir.exists()) {
std::string ErrMsg;
- if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
- throw std::runtime_error(ErrMsg);
+ if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
+ PrintError(ErrMsg);
+ return 1;
+ }
}
- return tempDir;
+ return 0;
}
- /// BuildTargets - A small wrapper for CompilationGraph::Build.
+ /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
+ /// non-zero value in case of error.
int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
int ret;
- const sys::Path& tempDir = getTempDir();
+ sys::Path tempDir;
bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
- try {
- ret = graph.Build(tempDir, langMap);
- }
- catch(...) {
- if (toDelete)
- tempDir.eraseFromDisk(true);
- throw;
- }
+ if (int ret = GetTempDir(tempDir))
+ return ret;
+
+ ret = graph.Build(tempDir, langMap);
if (toDelete)
tempDir.eraseFromDisk(true);
+
return ret;
}
}
@@ -89,68 +88,58 @@ void AppendToGlobalTimeLog(const std::string& cmd, double time) {
*GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
}
-// Sometimes plugins want to condition on the value in argv[0].
+// Sometimes user code wants to access the argv[0] value.
const char* ProgramName;
int Main(int argc, char** argv) {
- try {
- LanguageMap langMap;
- CompilationGraph graph;
-
- ProgramName = argv[0];
+ int ret = 0;
+ LanguageMap langMap;
+ CompilationGraph graph;
- cl::ParseCommandLineOptions
- (argc, argv, "LLVM Compiler Driver (Work In Progress)",
- /* ReadResponseFiles = */ false);
+ ProgramName = argv[0];
- PluginLoader Plugins;
- Plugins.RunInitialization(langMap, graph);
+ cl::ParseCommandLineOptions
+ (argc, argv,
+ /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
+ /* ReadResponseFiles = */ false);
- if (CheckGraph) {
- int ret = graph.Check();
- if (!ret)
- llvm::errs() << "check-graph: no errors found.\n";
+ if (int ret = autogenerated::RunInitialization(langMap, graph))
+ return ret;
- return ret;
- }
+ if (CheckGraph) {
+ ret = graph.Check();
+ if (!ret)
+ llvm::errs() << "check-graph: no errors found.\n";
- if (ViewGraph) {
- graph.viewGraph();
- if (!WriteGraph)
- return 0;
- }
+ return ret;
+ }
- if (WriteGraph) {
- graph.writeGraph(OutputFilename.empty()
- ? std::string("compilation-graph.dot")
- : OutputFilename);
+ if (ViewGraph) {
+ graph.viewGraph();
+ if (!WriteGraph)
return 0;
- }
+ }
- if (Time) {
- GlobalTimeLog = new std::stringstream;
- GlobalTimeLog->precision(2);
- }
+ if (WriteGraph) {
+ const std::string& Out = (OutputFilename.empty()
+ ? std::string("compilation-graph.dot")
+ : OutputFilename);
+ return graph.writeGraph(Out);
+ }
- int ret = BuildTargets(graph, langMap);
+ if (Time) {
+ GlobalTimeLog = new std::stringstream;
+ GlobalTimeLog->precision(2);
+ }
- if (Time) {
- llvm::errs() << GlobalTimeLog->str();
- delete GlobalTimeLog;
- }
+ ret = BuildTargets(graph, langMap);
- return ret;
- }
- catch(llvmc::error_code& ec) {
- return ec.code();
+ if (Time) {
+ llvm::errs() << GlobalTimeLog->str();
+ delete GlobalTimeLog;
}
- catch(const std::exception& ex) {
- llvm::errs() << argv[0] << ": " << ex.what() << '\n';
- }
- catch(...) {
- llvm::errs() << argv[0] << ": unknown error!\n";
- }
- return 1;
+
+ return ret;
}
} // end namespace llvmc
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
index 66c6d11552fc..8e8b73ca8f83 100644
--- a/lib/CompilerDriver/Makefile
+++ b/lib/CompilerDriver/Makefile
@@ -10,39 +10,11 @@
LEVEL = ../..
# We don't want this library to appear in `llvm-config --libs` output, so its
-# name doesn't start with "LLVM".
+# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set.
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
- LIBRARYNAME = libCompilerDriver
- LLVMLIBS = LLVMSupport.a LLVMSystem.a
- LOADABLE_MODULE := 1
-else
- LIBRARYNAME = CompilerDriver
- LINK_COMPONENTS = support system
-endif
+LIBRARYNAME = CompilerDriver
+LINK_COMPONENTS = support system
+NO_LLVM_CONFIG = 1
-REQUIRES_EH := 1
-REQUIRES_RTTI := 1
include $(LEVEL)/Makefile.common
-
-ifeq ($(ENABLE_LLVMC_DYNAMIC_PLUGINS), 1)
- CPP.Flags += -DENABLE_LLVMC_DYNAMIC_PLUGINS
-endif
-
-# Copy libCompilerDriver to the bin dir so that llvmc can find it.
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
-
-FullLibName = $(LIBRARYNAME)$(SHLIBEXT)
-
-all-local:: $(ToolDir)/$(FullLibName)
-
-$(ToolDir)/$(FullLibName): $(LibDir)/$(FullLibName) $(ToolDir)/.dir
- $(Echo) Copying $(BuildMode) Shared Library $(FullLibName) to $@
- -$(Verb) $(CP) $< $@
-
-clean-local::
- $(Echo) Removing $(BuildMode) Shared Library $(FullLibName) \
- from $(ToolDir)
- -$(Verb) $(RM) -f $(ToolDir)/$(FullLibName)
-endif
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
deleted file mode 100644
index 0fdfef4c6a29..000000000000
--- a/lib/CompilerDriver/Plugin.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===--- Plugin.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Plugin support.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/Plugin.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
-#include <algorithm>
-#include <vector>
-
-namespace {
-
- // Registry::Add<> does not do lifetime management (probably issues
- // with static constructor/destructor ordering), so we have to
- // implement it here.
- //
- // All this static registration/life-before-main model seems
- // unnecessary convoluted to me.
-
- static bool pluginListInitialized = false;
- typedef std::vector<const llvmc::BasePlugin*> PluginList;
- static PluginList Plugins;
- static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > PluginMutex;
-
- struct ByPriority {
- bool operator()(const llvmc::BasePlugin* lhs,
- const llvmc::BasePlugin* rhs) {
- return lhs->Priority() < rhs->Priority();
- }
- };
-}
-
-namespace llvmc {
-
- PluginLoader::PluginLoader() {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- if (!pluginListInitialized) {
- for (PluginRegistry::iterator B = PluginRegistry::begin(),
- E = PluginRegistry::end(); B != E; ++B)
- Plugins.push_back(B->instantiate());
- std::sort(Plugins.begin(), Plugins.end(), ByPriority());
- }
- pluginListInitialized = true;
- }
-
- PluginLoader::~PluginLoader() {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- if (pluginListInitialized) {
- for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
- B != E; ++B)
- delete (*B);
- }
- pluginListInitialized = false;
- }
-
- void PluginLoader::RunInitialization(LanguageMap& langMap,
- CompilationGraph& graph) const
- {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
- B != E; ++B) {
- const BasePlugin* BP = *B;
- BP->PreprocessOptions();
- BP->PopulateLanguageMap(langMap);
- BP->PopulateCompilationGraph(graph);
- }
- }
-
-}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index c7495d442d9c..f8f1f4a78ee5 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -236,6 +236,10 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
return 1;
}
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+ return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
return wrap(unwrap(EE)->getTargetData());
}
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index b367033d32b5..274f816f39e1 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -89,6 +89,10 @@ static int jit_atexit(void (*Fn)()) {
return 0; // Always successful
}
+static int jit_noop() {
+ return 0;
+}
+
//===----------------------------------------------------------------------===//
//
/// getPointerToNamedFunction - This method returns the address of the specified
@@ -104,6 +108,14 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
if (Name == "exit") return (void*)(intptr_t)&jit_exit;
if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
const char *NameStr = Name.c_str();
// If this is an asm specifier, skip the sentinal.
if (NameStr[0] == 1) ++NameStr;
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 67bd3ed10ad9..63125b79c8e2 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -67,7 +67,7 @@ extern "C" void LLVMLinkInJIT() {
}
-#if defined(__GNUC__) && !defined(__ARM__EABI__)
+#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
// libgcc defines the __register_frame function to dynamically register new
// dwarf frames for exception handling. This functionality is not portable
@@ -219,10 +219,8 @@ ExecutionEngine *JIT::createJIT(Module *M,
StringRef MArch,
StringRef MCPU,
const SmallVectorImpl<std::string>& MAttrs) {
- // Make sure we can resolve symbols in the program as well. The zero arg
- // to the function tells DynamicLibrary to load the program, not a library.
- if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
- return 0;
+ // Try to register the program as a source of symbols to resolve against.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
// Pick a target either via -march or by guessing the native arch.
TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
@@ -308,7 +306,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
}
// Register routine for informing unwinding runtime about new EH frames
-#if defined(__GNUC__) && !defined(__ARM_EABI__)
+#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
#if USE_KEYMGR
struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
_keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 749a57d92c94..6e11a3cd9368 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -90,8 +90,8 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
// section. This allows GDB to get a good stack trace, particularly on
// linux x86_64. Mark this as a PROGBITS section that needs to be loaded
// into memory at runtime.
- ELFSection &EH = EW.getSection(".eh_frame", ELFSection::SHT_PROGBITS,
- ELFSection::SHF_ALLOC);
+ ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
// Pointers in the DWARF EH info are all relative to the EH frame start,
// which is stored here.
EH.Addr = (uint64_t)I.EhStart;
@@ -102,9 +102,9 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
// Add this single function to the symbol table, so the debugger prints the
// name instead of '???'. We give the symbol default global visibility.
ELFSym *FnSym = ELFSym::getGV(F,
- ELFSym::STB_GLOBAL,
- ELFSym::STT_FUNC,
- ELFSym::STV_DEFAULT);
+ ELF::STB_GLOBAL,
+ ELF::STT_FUNC,
+ ELF::STV_DEFAULT);
FnSym->SectionIdx = Text.SectionIdx;
FnSym->Size = I.FnEnd - I.FnStart;
FnSym->Value = 0; // Offset from start of section.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 4b3ca8759b8a..1105bcc0437f 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -595,443 +595,3 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
return StartEHPtr;
}
-
-unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
- JITCodeEmitter& jce,
- unsigned char* StartFunction,
- unsigned char* EndFunction) {
- const TargetMachine& TM = F.getTarget();
- TD = TM.getTargetData();
- stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
- RI = TM.getRegisterInfo();
- JCE = &jce;
- unsigned FinalSize = 0;
-
- FinalSize += GetExceptionTableSizeInBytes(&F);
-
- const std::vector<const Function *> Personalities = MMI->getPersonalities();
- FinalSize +=
- GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]);
-
- FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()],
- StartFunction);
-
- return FinalSize;
-}
-
-/// RoundUpToAlign - Add the specified alignment to FinalSize and returns
-/// the new value.
-static unsigned RoundUpToAlign(unsigned FinalSize, unsigned Alignment) {
- if (Alignment == 0) Alignment = 1;
- // Since we do not know where the buffer will be allocated, be pessimistic.
- return FinalSize + Alignment;
-}
-
-unsigned
-JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality,
- unsigned char* StartFunction) const {
- unsigned PointerSize = TD->getPointerSize();
- unsigned FinalSize = 0;
- // EH frame header.
- FinalSize += PointerSize;
- // FDE CIE Offset
- FinalSize += 3 * PointerSize;
- // If there is a personality and landing pads then point to the language
- // specific data area in the exception table.
- if (Personality) {
- FinalSize += MCAsmInfo::getULEB128Size(4);
- FinalSize += PointerSize;
- } else {
- FinalSize += MCAsmInfo::getULEB128Size(0);
- }
-
- // Indicate locations of function specific callee saved registers in
- // frame.
- FinalSize += GetFrameMovesSizeInBytes((intptr_t)StartFunction,
- MMI->getFrameMoves());
-
- FinalSize = RoundUpToAlign(FinalSize, 4);
-
- // Double zeroes for the unwind runtime
- FinalSize += 2 * PointerSize;
-
- return FinalSize;
-}
-
-unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personality)
- const {
-
- unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
- PointerSize : -PointerSize;
- unsigned FinalSize = 0;
- // EH Common Frame header
- FinalSize += PointerSize;
- FinalSize += 4;
- FinalSize += 1;
- FinalSize += Personality ? 5 : 3; // "zPLR" or "zR"
- FinalSize += MCAsmInfo::getULEB128Size(1);
- FinalSize += MCAsmInfo::getSLEB128Size(stackGrowth);
- FinalSize += 1;
-
- if (Personality) {
- FinalSize += MCAsmInfo::getULEB128Size(7);
-
- // Encoding
- FinalSize+= 1;
- //Personality
- FinalSize += PointerSize;
-
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
-
- } else {
- FinalSize += MCAsmInfo::getULEB128Size(1);
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
- }
-
- std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
- FinalSize += GetFrameMovesSizeInBytes(0, Moves);
- FinalSize = RoundUpToAlign(FinalSize, 4);
- return FinalSize;
-}
-
-unsigned
-JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
- const std::vector<MachineMove> &Moves) const {
- unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
- PointerSize : -PointerSize;
- bool IsLocal = BaseLabelPtr;
- unsigned FinalSize = 0;
-
- for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
- const MachineMove &Move = Moves[i];
- MCSymbol *Label = Move.getLabel();
-
- // Throw out move if the label is invalid.
- if (Label && (*JCE->getLabelLocations())[Label] == 0)
- continue;
-
- intptr_t LabelPtr = 0;
- if (Label) LabelPtr = JCE->getLabelAddress(Label);
-
- const MachineLocation &Dst = Move.getDestination();
- const MachineLocation &Src = Move.getSource();
-
- // Advance row if new location.
- if (BaseLabelPtr && Label && (BaseLabelPtr != LabelPtr || !IsLocal)) {
- FinalSize++;
- FinalSize += PointerSize;
- BaseLabelPtr = LabelPtr;
- IsLocal = true;
- }
-
- // If advancing cfa.
- if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
- if (!Src.isReg()) {
- if (Src.getReg() == MachineLocation::VirtualFP) {
- ++FinalSize;
- } else {
- ++FinalSize;
- unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true);
- FinalSize += MCAsmInfo::getULEB128Size(RegNum);
- }
-
- int Offset = -Src.getOffset();
-
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- } else {
- llvm_unreachable("Machine move no supported yet.");
- }
- } else if (Src.isReg() &&
- Src.getReg() == MachineLocation::VirtualFP) {
- if (Dst.isReg()) {
- ++FinalSize;
- unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true);
- FinalSize += MCAsmInfo::getULEB128Size(RegNum);
- } else {
- llvm_unreachable("Machine move no supported yet.");
- }
- } else {
- unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
- int Offset = Dst.getOffset() / stackGrowth;
-
- if (Offset < 0) {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Reg);
- FinalSize += MCAsmInfo::getSLEB128Size(Offset);
- } else if (Reg < 64) {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- } else {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Reg);
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- }
- }
- }
- return FinalSize;
-}
-
-unsigned
-JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
- unsigned FinalSize = 0;
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads(JCE->getLabelLocations());
-
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- if (PadInfos.empty()) return 0;
-
- // Sort the landing pads in order of their type ids. This is used to fold
- // duplicate actions.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- LandingPads.reserve(PadInfos.size());
- for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
- LandingPads.push_back(&PadInfos[i]);
- std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
-
- // Negative type ids index into FilterIds, positive type ids index into
- // TypeInfos. The value written for a positive type id is just the type
- // id itself. For a negative type id, however, the value written is the
- // (negative) byte offset of the corresponding FilterIds entry. The byte
- // offset is usually equal to the type id, because the FilterIds entries
- // are written using a variable width encoding which outputs one byte per
- // entry as long as the value written is not too large, but can differ.
- // This kind of complication does not occur for positive type ids because
- // type infos are output using a fixed width encoding.
- // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
- SmallVector<int, 16> FilterOffsets;
- FilterOffsets.reserve(FilterIds.size());
- int Offset = -1;
- for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
- E = FilterIds.end(); I != E; ++I) {
- FilterOffsets.push_back(Offset);
- Offset -= MCAsmInfo::getULEB128Size(*I);
- }
-
- // Compute the actions table and gather the first action index for each
- // landing pad site.
- SmallVector<ActionEntry, 32> Actions;
- SmallVector<unsigned, 64> FirstActions;
- FirstActions.reserve(LandingPads.size());
-
- int FirstAction = 0;
- unsigned SizeActions = 0;
- for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
- const LandingPadInfo *LP = LandingPads[i];
- const std::vector<int> &TypeIds = LP->TypeIds;
- const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
- unsigned SizeSiteActions = 0;
-
- if (NumShared < TypeIds.size()) {
- unsigned SizeAction = 0;
- ActionEntry *PrevAction = 0;
-
- if (NumShared) {
- const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
- assert(Actions.size());
- PrevAction = &Actions.back();
- SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
- MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
- for (unsigned j = NumShared; j != SizePrevIds; ++j) {
- SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
- SizeAction += -PrevAction->NextAction;
- PrevAction = PrevAction->Previous;
- }
- }
-
- // Compute the actions.
- for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
- int TypeID = TypeIds[I];
- assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
- int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
- unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
-
- int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
- SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
- SizeSiteActions += SizeAction;
-
- ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
- Actions.push_back(Action);
-
- PrevAction = &Actions.back();
- }
-
- // Record the first action of the landing pad site.
- FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
- } // else identical - re-use previous FirstAction
-
- FirstActions.push_back(FirstAction);
-
- // Compute this sites contribution to size.
- SizeActions += SizeSiteActions;
- }
-
- // Compute the call-site table. Entries must be ordered by address.
- SmallVector<CallSiteEntry, 64> CallSites;
-
- RangeMapType PadMap;
- for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
- const LandingPadInfo *LandingPad = LandingPads[i];
- for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
- MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
- assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
- PadRange P = { i, j };
- PadMap[BeginLabel] = P;
- }
- }
-
- bool MayThrow = false;
- MCSymbol *LastLabel = 0;
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
- MI != E; ++MI) {
- if (!MI->isLabel()) {
- MayThrow |= MI->getDesc().isCall();
- continue;
- }
-
- MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
-
- if (BeginLabel == LastLabel)
- MayThrow = false;
-
- RangeMapType::iterator L = PadMap.find(BeginLabel);
-
- if (L == PadMap.end())
- continue;
-
- PadRange P = L->second;
- const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-
- assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
- "Inconsistent landing pad map!");
-
- // If some instruction between the previous try-range and this one may
- // throw, create a call-site entry with no landing pad for the region
- // between the try-ranges.
- if (MayThrow) {
- CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
- CallSites.push_back(Site);
- }
-
- LastLabel = LandingPad->EndLabels[P.RangeIndex];
- CallSiteEntry Site = {BeginLabel, LastLabel,
- LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
-
- assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
- "Invalid landing pad!");
-
- // Try to merge with the previous call-site.
- if (CallSites.size()) {
- CallSiteEntry &Prev = CallSites.back();
- if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
- // Extend the range of the previous entry.
- Prev.EndLabel = Site.EndLabel;
- continue;
- }
- }
-
- // Otherwise, create a new call-site.
- CallSites.push_back(Site);
- }
- }
- // If some instruction between the previous try-range and the end of the
- // function may throw, create a call-site entry with no landing pad for the
- // region following the try-range.
- if (MayThrow) {
- CallSiteEntry Site = {LastLabel, 0, 0, 0};
- CallSites.push_back(Site);
- }
-
- // Final tallies.
- unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
- sizeof(int32_t) + // Site length.
- sizeof(int32_t)); // Landing pad.
- for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
- SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
-
- unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
-
- unsigned TypeOffset = sizeof(int8_t) + // Call site format
- // Call-site table length
- MCAsmInfo::getULEB128Size(SizeSites) +
- SizeSites + SizeActions + SizeTypes;
-
- unsigned TotalSize = sizeof(int8_t) + // LPStart format
- sizeof(int8_t) + // TType format
- MCAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
- TypeOffset;
-
- unsigned SizeAlign = (4 - TotalSize) & 3;
-
- // Begin the exception table.
- FinalSize = RoundUpToAlign(FinalSize, 4);
- for (unsigned i = 0; i != SizeAlign; ++i) {
- ++FinalSize;
- }
-
- unsigned PointerSize = TD->getPointerSize();
-
- // Emit the header.
- ++FinalSize;
- // Asm->EOL("LPStart format (DW_EH_PE_omit)");
- ++FinalSize;
- // Asm->EOL("TType format (DW_EH_PE_absptr)");
- ++FinalSize;
- // Asm->EOL("TType base offset");
- ++FinalSize;
- // Asm->EOL("Call site format (DW_EH_PE_udata4)");
- ++FinalSize;
- // Asm->EOL("Call-site table length");
-
- // Emit the landing pad site information.
- for (unsigned i = 0; i < CallSites.size(); ++i) {
- CallSiteEntry &S = CallSites[i];
-
- // Asm->EOL("Region start");
- FinalSize += PointerSize;
-
- //Asm->EOL("Region length");
- FinalSize += PointerSize;
-
- // Asm->EOL("Landing pad");
- FinalSize += PointerSize;
-
- FinalSize += MCAsmInfo::getULEB128Size(S.Action);
- // Asm->EOL("Action");
- }
-
- // Emit the actions.
- for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
- ActionEntry &Action = Actions[I];
-
- //Asm->EOL("TypeInfo index");
- FinalSize += MCAsmInfo::getSLEB128Size(Action.ValueForTypeID);
- //Asm->EOL("Next action");
- FinalSize += MCAsmInfo::getSLEB128Size(Action.NextAction);
- }
-
- // Emit the type ids.
- for (unsigned M = TypeInfos.size(); M; --M) {
- // Asm->EOL("TypeInfo");
- FinalSize += PointerSize;
- }
-
- // Emit the filter typeids.
- for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
- unsigned TypeID = FilterIds[j];
- FinalSize += MCAsmInfo::getULEB128Size(TypeID);
- //Asm->EOL("Filter TypeInfo index");
- }
-
- FinalSize = RoundUpToAlign(FinalSize, 4);
-
- return FinalSize;
-}
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index e627550d6d0e..30956820f357 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -49,17 +49,6 @@ class JITDwarfEmitter {
unsigned char* EndFunction,
unsigned char* ExceptionTable) const;
- unsigned GetExceptionTableSizeInBytes(MachineFunction* MF) const;
-
- unsigned
- GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
- const std::vector<MachineMove> &Moves) const;
-
- unsigned GetCommonEHFrameSizeInBytes(const Function* Personality) const;
-
- unsigned GetEHFrameSizeInBytes(const Function* Personality,
- unsigned char* StartFunction) const;
-
public:
JITDwarfEmitter(JIT& jit);
@@ -71,11 +60,6 @@ public:
unsigned char* &EHFramePtr);
- unsigned GetDwarfTableSizeInBytes(MachineFunction& F,
- JITCodeEmitter& JCE,
- unsigned char* StartFunction,
- unsigned char* EndFunction);
-
void setModuleInfo(MachineModuleInfo* Info) {
MMI = Info;
}
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 28d79daed350..4c0d0789cced 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -152,16 +152,6 @@ namespace {
FunctionToCallSitesMap[F].insert(CallSite);
}
- // Returns the Function of the stub if a stub was erased, or NULL if there
- // was no stub. This function uses the call-site->function map to find a
- // relevant function, but asserts that only stubs and not other call sites
- // will be passed in.
- Function *EraseStub(const MutexGuard &locked, void *Stub);
-
- void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) {
- assert(locked.holds(TheJIT->lock));
- EraseAllCallSitesForPrelocked(F);
- }
void EraseAllCallSitesForPrelocked(Function *F);
// Erases _all_ call sites regardless of their function. This is used to
@@ -223,9 +213,6 @@ namespace {
/// specified GV address.
void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
- void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
- SmallVectorImpl<void*> &Ptrs);
-
/// getGOTIndexForAddress - Return a new or existing index in the GOT for
/// an address. This function only manages slots, it does not manage the
/// contents of the slots or the memory associated with the GOT.
@@ -398,7 +385,6 @@ namespace {
/// classof - Methods for support type inquiry through isa, cast, and
/// dyn_cast:
///
- static inline bool classof(const JITEmitter*) { return true; }
static inline bool classof(const MachineCodeEmitter*) { return true; }
JITResolver &getJITResolver() { return Resolver; }
@@ -480,26 +466,10 @@ namespace {
if (DE.get()) DE->setModuleInfo(Info);
}
- void setMemoryExecutable() {
- MemMgr->setMemoryExecutable();
- }
-
- JITMemoryManager *getMemMgr() const { return MemMgr; }
-
private:
void *getPointerToGlobal(GlobalValue *GV, void *Reference,
bool MayNeedFarStub);
void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
- unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
- unsigned addSizeOfGlobalsInConstantVal(
- const Constant *C, unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist);
- unsigned addSizeOfGlobalsInInitializer(
- const Constant *Init, unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist);
- unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
};
}
@@ -507,39 +477,6 @@ void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
JRS->EraseAllCallSitesForPrelocked(F);
}
-Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) {
- CallSiteToFunctionMapTy::iterator C2F_I =
- CallSiteToFunctionMap.find(Stub);
- if (C2F_I == CallSiteToFunctionMap.end()) {
- // Not a stub.
- return NULL;
- }
-
- StubToResolverMap->UnregisterStubResolver(Stub);
-
- Function *const F = C2F_I->second;
-#ifndef NDEBUG
- void *RealStub = FunctionToLazyStubMap.lookup(F);
- assert(RealStub == Stub &&
- "Call-site that wasn't a stub passed in to EraseStub");
-#endif
- FunctionToLazyStubMap.erase(F);
- CallSiteToFunctionMap.erase(C2F_I);
-
- // Remove the stub from the function->call-sites map, and remove the whole
- // entry from the map if that was the last call site.
- FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
- assert(F2C_I != FunctionToCallSitesMap.end() &&
- "FunctionToCallSitesMap broken");
- bool Erased = F2C_I->second.erase(Stub);
- (void)Erased;
- assert(Erased && "FunctionToCallSitesMap broken");
- if (F2C_I->second.empty())
- FunctionToCallSitesMap.erase(F2C_I);
-
- return F;
-}
-
void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
if (F2C == FunctionToCallSitesMap.end())
@@ -690,28 +627,6 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
return idx;
}
-void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
- SmallVectorImpl<void*> &Ptrs) {
- MutexGuard locked(TheJIT->lock);
-
- const FunctionToLazyStubMapTy &FM = state.getFunctionToLazyStubMap(locked);
- GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
- for (FunctionToLazyStubMapTy::const_iterator i = FM.begin(), e = FM.end();
- i != e; ++i){
- Function *F = i->first;
- if (F->isDeclaration() && F->hasExternalLinkage()) {
- GVs.push_back(i->first);
- Ptrs.push_back(i->second);
- }
- }
- for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
- i != e; ++i) {
- GVs.push_back(i->first);
- Ptrs.push_back(i->second);
- }
-}
-
/// JITCompilerFn - This function is called when a lazy compilation stub has
/// been entered. It looks up which function this stub corresponds to, compiles
/// it if necessary, then returns the resultant function pointer.
@@ -831,7 +746,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
if (DL.isUnknown()) return;
if (!BeforePrintingInsn) return;
- const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext();
+ const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
if (DL.getScope(Context) != 0 && PrevDL != DL) {
JITEvent_EmittedFunctionDetails::LineStart NextLine;
@@ -859,184 +774,6 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
return Size;
}
-static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) {
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return 0;
-
- unsigned NumEntries = 0;
- for (unsigned i = 0, e = JT.size(); i != e; ++i)
- NumEntries += JT[i].MBBs.size();
-
- return NumEntries * MJTI->getEntrySize(*jit->getTargetData());
-}
-
-static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
- if (Alignment == 0) Alignment = 1;
- // Since we do not know where the buffer will be allocated, be pessimistic.
- return Size + Alignment;
-}
-
-/// addSizeOfGlobal - add the size of the global (plus any alignment padding)
-/// into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
- const Type *ElTy = GV->getType()->getElementType();
- size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
- size_t GVAlign =
- (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
- DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
- DEBUG(GV->dump());
- // Assume code section ends with worst possible alignment, so first
- // variable needs maximal padding.
- if (Size==0)
- Size = 1;
- Size = ((Size+GVAlign-1)/GVAlign)*GVAlign;
- Size += GVSize;
- return Size;
-}
-
-/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
-/// but are referenced from the constant; put them in SeenGlobals and the
-/// Worklist, and add their size into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
- const Constant *C,
- unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist) {
- // If its undefined, return the garbage.
- if (isa<UndefValue>(C))
- return Size;
-
- // If the value is a ConstantExpr
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- Constant *Op0 = CE->getOperand(0);
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
- break;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
- Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size,
- SeenGlobals, Worklist);
- break;
- }
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "ConstantExpr not handled: " << *CE;
- report_fatal_error(Msg.str());
- }
- }
- }
-
- if (C->getType()->getTypeID() == Type::PointerTyID)
- if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
- if (SeenGlobals.insert(GV)) {
- Worklist.push_back(GV);
- Size = addSizeOfGlobal(GV, Size);
- }
-
- return Size;
-}
-
-/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
-/// but are referenced from the given initializer.
-
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(
- const Constant *Init,
- unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist) {
- if (!isa<UndefValue>(Init) &&
- !isa<ConstantVector>(Init) &&
- !isa<ConstantAggregateZero>(Init) &&
- !isa<ConstantArray>(Init) &&
- !isa<ConstantStruct>(Init) &&
- Init->getType()->isFirstClassType())
- Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist);
- return Size;
-}
-
-/// GetSizeOfGlobalsInBytes - walk the code for the function, looking for
-/// globals; then walk the initializers of those globals looking for more.
-/// If their size has not been considered yet, add it into the running total
-/// Size.
-
-unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
- unsigned Size = 0;
- SmallPtrSet<const GlobalVariable*, 8> SeenGlobals;
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const TargetInstrDesc &Desc = I->getDesc();
- const MachineInstr &MI = *I;
- unsigned NumOps = Desc.getNumOperands();
- for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) {
- const MachineOperand &MO = MI.getOperand(CurOp);
- if (MO.isGlobal()) {
- const GlobalValue* V = MO.getGlobal();
- const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V);
- if (!GV)
- continue;
- // If seen in previous function, it will have an entry here.
- if (TheJIT->getPointerToGlobalIfAvailable(
- const_cast<GlobalVariable *>(GV)))
- continue;
- // If seen earlier in this function, it will have an entry here.
- // FIXME: it should be possible to combine these tables, by
- // assuming the addresses of the new globals in this module
- // start at 0 (or something) and adjusting them after codegen
- // complete. Another possibility is to grab a marker bit in GV.
- if (SeenGlobals.insert(GV))
- // A variable as yet unseen. Add in its size.
- Size = addSizeOfGlobal(GV, Size);
- }
- }
- }
- }
- DEBUG(dbgs() << "JIT: About to look through initializers\n");
- // Look for more globals that are referenced only from initializers.
- SmallVector<const GlobalVariable*, 8> Worklist(
- SeenGlobals.begin(), SeenGlobals.end());
- while (!Worklist.empty()) {
- const GlobalVariable* GV = Worklist.back();
- Worklist.pop_back();
- if (GV->hasInitializer())
- Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size,
- SeenGlobals, Worklist);
- }
-
- return Size;
-}
-
void JITEmitter::startFunction(MachineFunction &F) {
DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
<< F.getFunction()->getName() << "\n");
@@ -1044,43 +781,8 @@ void JITEmitter::startFunction(MachineFunction &F) {
uintptr_t ActualSize = 0;
// Set the memory writable, if it's not already
MemMgr->setMemoryWritable();
- if (MemMgr->NeedsExactSize()) {
- DEBUG(dbgs() << "JIT: ExactSize\n");
- const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
- MachineConstantPool *MCP = F.getConstantPool();
-
- // Ensure the constant pool/jump table info is at least 4-byte aligned.
- ActualSize = RoundUpToAlign(ActualSize, 16);
-
- // Add the alignment of the constant pool
- ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());
-
- // Add the constant pool size
- ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
-
- if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) {
- // Add the aligment of the jump table info
- ActualSize = RoundUpToAlign(ActualSize,
- MJTI->getEntryAlignment(*TheJIT->getTargetData()));
-
- // Add the jump table size
- ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT);
- }
-
- // Add the alignment for the function
- ActualSize = RoundUpToAlign(ActualSize,
- std::max(F.getFunction()->getAlignment(), 8U));
-
- // Add the function size
- ActualSize += TII->GetFunctionSizeInBytes(F);
-
- DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n");
- // Add the size of the globals that will be allocated after this function.
- // These are all the ones referenced from this function that were not
- // previously allocated.
- ActualSize += GetSizeOfGlobalsInBytes(F);
- DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n");
- } else if (SizeEstimate > 0) {
+
+ if (SizeEstimate > 0) {
// SizeEstimate will be non-zero on reallocation attempts.
ActualSize = SizeEstimate;
}
@@ -1268,9 +970,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
SavedBufferEnd = BufferEnd;
SavedCurBufferPtr = CurBufferPtr;
- if (MemMgr->NeedsExactSize())
- ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
-
BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
ActualSize);
BufferEnd = BufferBegin+ActualSize;
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 8487c83ce36a..7e8245a9e3a6 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -96,15 +97,6 @@ public:
return 0;
}
- /// erase - Remove the specified type, returning true if it was in the set.
- bool erase(const Type *Ty) {
- if (!TheMap.erase(Ty))
- return false;
- if (Ty->isAbstract())
- Ty->removeAbstractTypeUser(this);
- return true;
- }
-
/// insert - This returns true if the pointer was new to the set, false if it
/// was already in the set.
bool insert(const Type *Src, const Type *Dst) {
@@ -334,97 +326,6 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
return false;
}
-#ifndef NDEBUG
-static void PrintMap(const std::map<const Value*, Value*> &M) {
- for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end();
- I != E; ++I) {
- dbgs() << " Fr: " << (void*)I->first << " ";
- I->first->dump();
- dbgs() << " To: " << (void*)I->second << " ";
- I->second->dump();
- dbgs() << "\n";
- }
-}
-#endif
-
-
-// RemapOperand - Use ValueMap to convert constants from one module to another.
-static Value *RemapOperand(const Value *In,
- std::map<const Value*, Value*> &ValueMap) {
- std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In);
- if (I != ValueMap.end())
- return I->second;
-
- // Check to see if it's a constant that we are interested in transforming.
- Value *Result = 0;
- if (const Constant *CPV = dyn_cast<Constant>(In)) {
- if ((!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV)) ||
- isa<ConstantInt>(CPV) || isa<ConstantAggregateZero>(CPV))
- return const_cast<Constant*>(CPV); // Simple constants stay identical.
-
- if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) {
- std::vector<Constant*> Operands(CPA->getNumOperands());
- for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
- Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap));
- Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
- } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) {
- std::vector<Constant*> Operands(CPS->getNumOperands());
- for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
- Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap));
- Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
- } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) {
- Result = const_cast<Constant*>(CPV);
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) {
- std::vector<Constant*> Operands(CP->getNumOperands());
- for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
- Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap));
- Result = ConstantVector::get(Operands);
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
- std::vector<Constant*> Ops;
- for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
- Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap)));
- Result = CE->getWithOperands(Ops);
- } else if (const BlockAddress *CE = dyn_cast<BlockAddress>(CPV)) {
- Result = BlockAddress::get(
- cast<Function>(RemapOperand(CE->getFunction(), ValueMap)),
- CE->getBasicBlock());
- } else {
- assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
- llvm_unreachable("Unknown type of derived type constant value!");
- }
- } else if (const MDNode *MD = dyn_cast<MDNode>(In)) {
- if (MD->isFunctionLocal()) {
- SmallVector<Value*, 4> Elts;
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
- if (MD->getOperand(i))
- Elts.push_back(RemapOperand(MD->getOperand(i), ValueMap));
- else
- Elts.push_back(NULL);
- }
- Result = MDNode::get(In->getContext(), Elts.data(), MD->getNumOperands());
- } else {
- Result = const_cast<Value*>(In);
- }
- } else if (isa<MDString>(In) || isa<InlineAsm>(In) || isa<Instruction>(In)) {
- Result = const_cast<Value*>(In);
- }
-
- // Cache the mapping in our local map structure
- if (Result) {
- ValueMap[In] = Result;
- return Result;
- }
-
-#ifndef NDEBUG
- dbgs() << "LinkModules ValueMap: \n";
- PrintMap(ValueMap);
-
- dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
- llvm_unreachable("Couldn't remap value!");
-#endif
- return 0;
-}
-
/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
/// in the symbol table. This is good for all clients except for us. Go
/// through the trouble to force this back.
@@ -541,25 +442,24 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
}
// Insert all of the named mdnoes in Src into the Dest module.
-static void LinkNamedMDNodes(Module *Dest, Module *Src) {
+static void LinkNamedMDNodes(Module *Dest, Module *Src,
+ ValueToValueMapTy &ValueMap) {
for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
E = Src->named_metadata_end(); I != E; ++I) {
const NamedMDNode *SrcNMD = I;
- NamedMDNode *DestNMD = Dest->getNamedMetadata(SrcNMD->getName());
- if (!DestNMD)
- NamedMDNode::Create(SrcNMD, Dest);
- else {
- // Add Src elements into Dest node.
- for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
- DestNMD->addOperand(SrcNMD->getOperand(i));
- }
+ NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
+ // Add Src elements into Dest node.
+ for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
+ DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
+ ValueMap,
+ true)));
}
}
// LinkGlobals - Loop through the global variables in the src module and merge
// them into the dest module.
static bool LinkGlobals(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::multimap<std::string, GlobalVariable *> &AppendingVars,
std::string *Err) {
ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
@@ -735,6 +635,12 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
else if (SL == GlobalValue::LinkerPrivateLinkage &&
DL == GlobalValue::LinkerPrivateLinkage)
return GlobalValue::LinkerPrivateLinkage;
+ else if (SL == GlobalValue::LinkerPrivateWeakLinkage &&
+ DL == GlobalValue::LinkerPrivateWeakLinkage)
+ return GlobalValue::LinkerPrivateWeakLinkage;
+ else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage &&
+ DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
else {
assert (SL == GlobalValue::PrivateLinkage &&
DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
@@ -746,7 +652,7 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
// dest module. We're assuming, that all functions/global variables were already
// linked in.
static bool LinkAlias(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all alias in the src module
for (Module::const_alias_iterator I = Src->alias_begin(),
@@ -757,7 +663,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
// Globals were already linked, thus we can just query ValueMap for variant
// of SAliasee in Dest.
- std::map<const Value*,Value*>::const_iterator VMI = ValueMap.find(SAliasee);
+ ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee);
assert(VMI != ValueMap.end() && "Aliasee not linked");
GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
GlobalValue* DGV = NULL;
@@ -888,7 +794,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
ForceRenaming(NewGA, SGA->getName());
// Remember this mapping so uses in the source module get remapped
- // later by RemapOperand.
+ // later by MapValue.
ValueMap[SGA] = NewGA;
}
@@ -899,7 +805,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
// LinkGlobalInits - Update the initializers in the Dest module now that all
// globals that may be referenced are in Dest.
static bool LinkGlobalInits(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = Src->global_begin(),
@@ -909,7 +815,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
if (SGV->hasInitializer()) { // Only process initialized GV's
// Figure out what the initializer looks like in the dest module...
Constant *SInit =
- cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap));
+ cast<Constant>(MapValue(SGV->getInitializer(), ValueMap, true));
// Grab destination global variable or alias.
GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
@@ -954,7 +860,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
// to the Dest function...
//
static bool LinkFunctionProtos(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
@@ -1039,7 +945,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
ForceRenaming(NewDF, SF->getName());
// Remember this mapping so uses in the source module get remapped
- // later by RemapOperand.
+ // later by MapValue.
ValueMap[SF] = NewDF;
continue;
}
@@ -1069,7 +975,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
// fix up references to values. At this point we know that Dest is an external
// function, and that Src is not.
static bool LinkFunctionBody(Function *Dest, Function *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
@@ -1091,12 +997,30 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
// the Source function as operands. Loop through all of the operands of the
// functions and patch them up to point to the local versions...
//
+ // This is the same as RemapInstruction, except that it avoids remapping
+ // instruction and basic block operands.
+ //
for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Remap operands.
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
- *OI = RemapOperand(*OI, ValueMap);
+ *OI = MapValue(*OI, ValueMap, true);
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ if (!isa<Instruction>(Old) && !isa<BasicBlock>(Old)) {
+ Value *New = MapValue(Old, ValueMap, true);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+ }
+ }
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1111,7 +1035,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
// source module into the DestModule. This consists basically of copying the
// function over and fixing up references to values.
static bool LinkFunctionBodies(Module *Dest, Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all of the functions in the src module, mapping them over as we
@@ -1319,8 +1243,10 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
return true;
// ValueMap - Mapping of values from what they used to be in Src, to what they
- // are now in Dest.
- std::map<const Value*, Value*> ValueMap;
+ // are now in Dest. ValueToValueMapTy is a ValueMap, which involves some
+ // overhead due to the use of Value handles which the Linker doesn't actually
+ // need, but this allows us to reuse the ValueMapper code.
+ ValueToValueMapTy ValueMap;
// AppendingVars - Keep track of global variables in the destination module
// with appending linkage. After the module is linked together, they are
@@ -1334,9 +1260,6 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
AppendingVars.insert(std::make_pair(I->getName(), I));
}
- // Insert all of the named mdnoes in Src into the Dest module.
- LinkNamedMDNodes(Dest, Src);
-
// Insert all of the globals in src into the Dest module... without linking
// initializers (which could refer to functions not yet mapped over).
if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
@@ -1370,6 +1293,11 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
// Resolve all uses of aliases with aliasees
if (ResolveAliases(Dest)) return true;
+ // Remap all of the named mdnoes in Src into the Dest module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ LinkNamedMDNodes(Dest, Src, ValueMap);
+
// If the source library's module id is in the dependent library list of the
// destination library, remove it since that module is now linked in.
sys::Path modId;
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index fc4f3c69482a..60a3a3e3e312 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMC
+ ELFObjectWriter.cpp
MCAsmInfo.cpp
MCAsmInfoCOFF.cpp
MCAsmInfoDarwin.cpp
@@ -7,10 +8,12 @@ add_llvm_library(LLVMMC
MCCodeEmitter.cpp
MCContext.cpp
MCDisassembler.cpp
+ MCELFStreamer.cpp
MCExpr.cpp
MCInst.cpp
MCInstPrinter.cpp
MCLabel.cpp
+ MCDwarf.cpp
MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCNullStreamer.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 000000000000..cf35b45715e1
--- /dev/null
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,973 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/ELFObjectWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+namespace {
+
+ class ELFObjectWriterImpl {
+ static bool isFixupKindX86PCRel(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return false;
+ case X86::reloc_pcrel_1byte:
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return true;
+ }
+ }
+
+ /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }*/
+
+
+ /// ELFSymbolData - Helper struct for containing some precomputed information
+ /// on symbols.
+ struct ELFSymbolData {
+ MCSymbolData *SymbolData;
+ uint64_t StringIndex;
+ uint32_t SectionIndex;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFSymbolData &RHS) const {
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+ }
+ };
+
+ /// @name Relocation Data
+ /// @{
+
+ struct ELFRelocationEntry {
+ // Make these big enough for both 32-bit and 64-bit
+ uint64_t r_offset;
+ uint64_t r_info;
+ uint64_t r_addend;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFRelocationEntry &RE) const {
+ return RE.r_offset < r_offset;
+ }
+ };
+
+ llvm::DenseMap<const MCSectionData*,
+ std::vector<ELFRelocationEntry> > Relocations;
+ DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+ /// @}
+ /// @name Symbol Table Data
+ /// @{
+
+ SmallString<256> StringTable;
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
+ std::vector<ELFSymbolData> UndefinedSymbolData;
+
+ /// @}
+
+ ELFObjectWriter *Writer;
+
+ raw_ostream &OS;
+
+ // This holds the current offset into the object file.
+ size_t FileOff;
+
+ unsigned Is64Bit : 1;
+
+ bool HasRelocationAddend;
+
+ // This holds the symbol table index of the last local symbol.
+ unsigned LastLocalSymbolIndex;
+ // This holds the .strtab section index.
+ unsigned StringTableIndex;
+
+ unsigned ShstrtabIndex;
+
+ public:
+ ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit,
+ bool _HasRelAddend)
+ : Writer(_Writer), OS(Writer->getStream()), FileOff(0),
+ Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) {
+ }
+
+ void Write8(uint8_t Value) { Writer->Write8(Value); }
+ void Write16(uint16_t Value) { Writer->Write16(Value); }
+ void Write32(uint32_t Value) { Writer->Write32(Value); }
+ //void Write64(uint64_t Value) { Writer->Write64(Value); }
+ void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
+ //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+ // Writer->WriteBytes(Str, ZeroFillSize);
+ //}
+
+ void WriteWord(uint64_t W) {
+ if (Is64Bit)
+ Writer->Write64(W);
+ else
+ Writer->Write32(W);
+ }
+
+ void String8(char *buf, uint8_t Value) {
+ buf[0] = Value;
+ }
+
+ void StringLE16(char *buf, uint16_t Value) {
+ buf[0] = char(Value >> 0);
+ buf[1] = char(Value >> 8);
+ }
+
+ void StringLE32(char *buf, uint32_t Value) {
+ StringLE16(buf, uint16_t(Value >> 0));
+ StringLE16(buf + 2, uint16_t(Value >> 16));
+ }
+
+ void StringLE64(char *buf, uint64_t Value) {
+ StringLE32(buf, uint32_t(Value >> 0));
+ StringLE32(buf + 4, uint32_t(Value >> 32));
+ }
+
+ void StringBE16(char *buf ,uint16_t Value) {
+ buf[0] = char(Value >> 8);
+ buf[1] = char(Value >> 0);
+ }
+
+ void StringBE32(char *buf, uint32_t Value) {
+ StringBE16(buf, uint16_t(Value >> 16));
+ StringBE16(buf + 2, uint16_t(Value >> 0));
+ }
+
+ void StringBE64(char *buf, uint64_t Value) {
+ StringBE32(buf, uint32_t(Value >> 32));
+ StringBE32(buf + 4, uint32_t(Value >> 0));
+ }
+
+ void String16(char *buf, uint16_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE16(buf, Value);
+ else
+ StringBE16(buf, Value);
+ }
+
+ void String32(char *buf, uint32_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE32(buf, Value);
+ else
+ StringBE32(buf, Value);
+ }
+
+ void String64(char *buf, uint64_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE64(buf, Value);
+ else
+ StringBE64(buf, Value);
+ }
+
+ void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+ void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info,
+ uint64_t value, uint64_t size,
+ uint8_t other, uint16_t shndx);
+
+ void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+ const MCAsmLayout &Layout);
+
+ void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+ uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S);
+
+ /// ComputeSymbolTable - Compute the symbol table data
+ ///
+ /// \param StringTable [out] - The string table data.
+ /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+ /// string table.
+ void ComputeSymbolTable(MCAssembler &Asm);
+
+ void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ const MCSectionData &SD);
+
+ void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ WriteRelocation(Asm, Layout, *it);
+ }
+ }
+
+ void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout);
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm) {
+ // Compute symbol table information.
+ ComputeSymbolTable(Asm);
+ }
+
+ void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Size, uint32_t Link, uint32_t Info,
+ uint64_t Alignment, uint64_t EntrySize);
+
+ void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F,
+ const MCSectionData *SD);
+
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ };
+
+}
+
+// Emit the ELF header.
+void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
+ unsigned NumberOfSections) {
+ // ELF Header
+ // ----------
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the latter.
+
+ Write8(0x7f); // e_ident[EI_MAG0]
+ Write8('E'); // e_ident[EI_MAG1]
+ Write8('L'); // e_ident[EI_MAG2]
+ Write8('F'); // e_ident[EI_MAG3]
+
+ Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+ // e_ident[EI_DATA]
+ Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+ Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ Write8(ELF::ELFOSABI_LINUX); // e_ident[EI_OSABI]
+ Write8(0); // e_ident[EI_ABIVERSION]
+
+ WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+ Write16(ELF::ET_REL); // e_type
+
+ // FIXME: Make this configurable
+ Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target
+
+ Write32(ELF::EV_CURRENT); // e_version
+ WriteWord(0); // e_entry, no entry point in .o file
+ WriteWord(0); // e_phoff, no program header for .o
+ WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
+
+ // FIXME: Make this configurable.
+ Write32(0); // e_flags = whatever the target wants
+
+ // e_ehsize = ELF header size
+ Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+ Write16(0); // e_phentsize = prog header entry size
+ Write16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+ // e_shnum = # of section header ents
+ Write16(NumberOfSections);
+
+ // e_shstrndx = Section # of '.shstrtab'
+ Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name,
+ uint8_t info, uint64_t value,
+ uint64_t size, uint8_t other,
+ uint16_t shndx) {
+ if (Is64Bit) {
+ char buf[8];
+
+ String32(buf, name);
+ F->getContents() += StringRef(buf, 4); // st_name
+
+ String8(buf, info);
+ F->getContents() += StringRef(buf, 1); // st_info
+
+ String8(buf, other);
+ F->getContents() += StringRef(buf, 1); // st_other
+
+ String16(buf, shndx);
+ F->getContents() += StringRef(buf, 2); // st_shndx
+
+ String64(buf, value);
+ F->getContents() += StringRef(buf, 8); // st_value
+
+ String64(buf, size);
+ F->getContents() += StringRef(buf, 8); // st_size
+ } else {
+ char buf[4];
+
+ String32(buf, name);
+ F->getContents() += StringRef(buf, 4); // st_name
+
+ String32(buf, value);
+ F->getContents() += StringRef(buf, 4); // st_value
+
+ String32(buf, size);
+ F->getContents() += StringRef(buf, 4); // st_size
+
+ String8(buf, info);
+ F->getContents() += StringRef(buf, 1); // st_info
+
+ String8(buf, other);
+ F->getContents() += StringRef(buf, 1); // st_other
+
+ String16(buf, shndx);
+ F->getContents() += StringRef(buf, 2); // st_shndx
+ }
+}
+
+void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &Data = *MSD.SymbolData;
+ uint8_t Info = (Data.getFlags() & 0xff);
+ uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift);
+ uint64_t Value = 0;
+ uint64_t Size = 0;
+ const MCExpr *ESize;
+
+ if (Data.isCommon() && Data.isExternal())
+ Value = Data.getCommonAlignment();
+
+ if (!Data.isCommon())
+ if (MCFragment *FF = Data.getFragment())
+ Value = Layout.getSymbolAddress(&Data) -
+ Layout.getSectionAddress(FF->getParent());
+
+ ESize = Data.getSize();
+ if (Data.getSize()) {
+ MCValue Res;
+ if (ESize->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize);
+
+ if (BE->EvaluateAsRelocatable(Res, &Layout)) {
+ MCSymbolData &A =
+ Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol());
+ MCSymbolData &B =
+ Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol());
+
+ Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B);
+ }
+ } else if (ESize->getKind() == MCExpr::Constant) {
+ Size = static_cast<const MCConstantExpr *>(ESize)->getValue();
+ } else {
+ assert(0 && "Unsupported size expression");
+ }
+ }
+
+ // Write out the symbol table entry
+ WriteSymbolEntry(F, MSD.StringIndex, Info, Value,
+ Size, Other, MSD.SectionIndex);
+}
+
+void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // The string table must be emitted first because we need the index
+ // into the string table for all the symbol names.
+ assert(StringTable.size() && "Missing string table");
+
+ // FIXME: Make sure the start of the symbol table is aligned.
+
+ // The first entry is the undefined symbol entry.
+ unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+ F->getContents().append(EntrySize, '\x00');
+
+ // Write the symbol table entries.
+ LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = LocalSymbolData[i];
+ WriteSymbol(F, MSD, Layout);
+ }
+
+ // Write out a symbol table entry for each section.
+ // leaving out the just added .symtab which is at
+ // the very end
+ unsigned Index = 1;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ // Leave out relocations so we don't have indexes within
+ // the relocations messed up
+ if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL)
+ continue;
+ if (Index == Asm.size())
+ continue;
+ WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index);
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = ExternalSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ assert((Data.getFlags() & ELF_STB_Global) &&
+ "External symbol requires STB_GLOBAL flag");
+ WriteSymbol(F, MSD, Layout);
+ if (Data.getFlags() & ELF_STB_Local)
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = UndefinedSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ Data.setFlags(Data.getFlags() | ELF_STB_Global);
+ WriteSymbol(F, MSD, Layout);
+ if (Data.getFlags() & ELF_STB_Local)
+ LastLocalSymbolIndex++;
+ }
+}
+
+// FIXME: this is currently X86/X86_64 only
+void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ int64_t Addend = 0;
+ unsigned Index = 0;
+ int64_t Value = Target.getConstant();
+
+ if (!Target.isAbsolute()) {
+ const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
+ MCFragment *F = SD.getFragment();
+
+ if (Base) {
+ if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) {
+ Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
+ Value += Layout.getSymbolAddress(&SD);
+ } else
+ Index = getSymbolIndexInSymbolTable(Asm, Symbol);
+ if (Base != &SD)
+ Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
+ Addend = Value;
+ // Compensate for the addend on i386.
+ if (Is64Bit)
+ Value = 0;
+ } else {
+ if (F) {
+ // Index of the section in .symtab against this symbol
+ // is being relocated + 2 (empty section + abs. symbols).
+ Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
+
+ MCSectionData *FSD = F->getParent();
+ // Offset of the symbol in the section
+ Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD);
+ } else {
+ FixedValue = Value;
+ return;
+ }
+ }
+ }
+
+ FixedValue = Value;
+
+ // determine the type of the relocation
+ bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind());
+ unsigned Type;
+ if (Is64Bit) {
+ if (IsPCRel) {
+ Type = ELF::R_X86_64_PC32;
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_pcrel_4byte:
+ case FK_Data_4:
+ // check that the offset fits within a signed long
+ if (isInt<32>(Target.getConstant()))
+ Type = ELF::R_X86_64_32S;
+ else
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case X86::reloc_pcrel_1byte:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else {
+ if (IsPCRel) {
+ Type = ELF::R_386_PC32;
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case X86::reloc_pcrel_4byte:
+ case FK_Data_4: Type = ELF::R_386_32; break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case X86::reloc_pcrel_1byte:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ }
+
+ ELFRelocationEntry ERE;
+
+ if (Is64Bit) {
+ struct ELF::Elf64_Rela ERE64;
+ ERE64.setSymbolAndType(Index, Type);
+ ERE.r_info = ERE64.r_info;
+ } else {
+ struct ELF::Elf32_Rela ERE32;
+ ERE32.setSymbolAndType(Index, Type);
+ ERE.r_info = ERE32.r_info;
+ }
+
+ ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+ if (HasRelocationAddend)
+ ERE.r_addend = Addend;
+ else
+ ERE.r_addend = 0; // Silence compiler warning.
+
+ Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+uint64_t
+ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S) {
+ MCSymbolData &SD = Asm.getSymbolData(*S);
+
+ // Local symbol.
+ if (!SD.isExternal() && !S->isUndefined())
+ return SD.getIndex() + /* empty symbol */ 1;
+
+ // External or undefined symbol.
+ return SD.getIndex() + Asm.size() + /* empty symbol */ 1;
+}
+
+void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
+ // Build section lookup table.
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index)
+ SectionIndexMap[&it->getSection()] = Index;
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // Add the data for local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(Symbol))
+ continue;
+
+ if (it->isExternal() || Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ LocalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ LocalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Now add non-local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(Symbol))
+ continue;
+
+ if (!it->isExternal() && !Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isUndefined()) {
+ MSD.SectionIndex = ELF::SHN_UNDEF;
+ // XXX: for some reason we dont Emit* this
+ it->setFlags(it->getFlags() | ELF_STB_Global);
+ UndefinedSymbolData.push_back(MSD);
+ } else if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ ExternalSymbolData.push_back(MSD);
+ } else if (it->isCommon()) {
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ ExternalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ ExternalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Symbols are required to be in lexicographic order.
+ array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+ array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices. Local symbols must come before all other
+ // symbols with non-local bindings.
+ Index = 0;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+}
+
+void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (!Relocations[&SD].empty()) {
+ MCContext &Ctx = Asm.getContext();
+ const MCSection *RelaSection;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const StringRef SectionName = Section.getSectionName();
+ std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel";
+ RelaSectionName += SectionName;
+
+ unsigned EntrySize;
+ if (HasRelocationAddend)
+ EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+ else
+ EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+ RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ?
+ ELF::SHT_RELA : ELF::SHT_REL, 0,
+ SectionKind::getReadOnly(),
+ false, EntrySize);
+
+ MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+ RelaSD.setAlignment(1);
+
+ MCDataFragment *F = new MCDataFragment(&RelaSD);
+
+ WriteRelocationsFragment(Asm, F, &SD);
+
+ Asm.AddSectionToTheEnd(RelaSD, Layout);
+ }
+}
+
+void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+ uint64_t Flags, uint64_t Address,
+ uint64_t Offset, uint64_t Size,
+ uint32_t Link, uint32_t Info,
+ uint64_t Alignment,
+ uint64_t EntrySize) {
+ Write32(Name); // sh_name: index into string table
+ Write32(Type); // sh_type
+ WriteWord(Flags); // sh_flags
+ WriteWord(Address); // sh_addr
+ WriteWord(Offset); // sh_offset
+ WriteWord(Size); // sh_size
+ Write32(Link); // sh_link
+ Write32(Info); // sh_info
+ WriteWord(Alignment); // sh_addralign
+ WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD) {
+ std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+ // sort by the r_offset just like gnu as does
+ array_pod_sort(Relocs.begin(), Relocs.end());
+
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ ELFRelocationEntry entry = Relocs[e - i - 1];
+
+ unsigned WordSize = Is64Bit ? 8 : 4;
+ F->getContents() += StringRef((const char *)&entry.r_offset, WordSize);
+ F->getContents() += StringRef((const char *)&entry.r_info, WordSize);
+
+ if (HasRelocationAddend)
+ F->getContents() += StringRef((const char *)&entry.r_addend, WordSize);
+ }
+}
+
+void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
+ MCAsmLayout &Layout) {
+ MCContext &Ctx = Asm.getContext();
+ MCDataFragment *F;
+
+ WriteRelocations(Asm, Layout);
+
+ const MCSection *SymtabSection;
+ unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+ SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ false, EntrySize);
+
+ MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+
+ SymtabSD.setAlignment(Is64Bit ? 8 : 4);
+
+ F = new MCDataFragment(&SymtabSD);
+
+ // Symbol table
+ WriteSymbolTable(F, Asm, Layout);
+ Asm.AddSectionToTheEnd(SymtabSD, Layout);
+
+ const MCSection *StrtabSection;
+ StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly(), false);
+
+ MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+ StrtabSD.setAlignment(1);
+
+ // FIXME: This isn't right. If the sections get rearranged this will
+ // be wrong. We need a proper lookup.
+ StringTableIndex = Asm.size();
+
+ F = new MCDataFragment(&StrtabSD);
+ F->getContents().append(StringTable.begin(), StringTable.end());
+ Asm.AddSectionToTheEnd(StrtabSD, Layout);
+
+ const MCSection *ShstrtabSection;
+ ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly(), false);
+
+ MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+ ShstrtabSD.setAlignment(1);
+
+ F = new MCDataFragment(&ShstrtabSD);
+
+ // FIXME: This isn't right. If the sections get rearranged this will
+ // be wrong. We need a proper lookup.
+ ShstrtabIndex = Asm.size();
+
+ // Section header string table.
+ //
+ // The first entry of a string table holds a null character so skip
+ // section 0.
+ uint64_t Index = 1;
+ F->getContents() += '\x00';
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+
+ // Remember the index into the string table so we can write it
+ // into the sh_name field of the section header table.
+ SectionStringTableIndex[&it->getSection()] = Index;
+
+ Index += Section.getSectionName().size() + 1;
+ F->getContents() += Section.getSectionName();
+ F->getContents() += '\x00';
+ }
+
+ Asm.AddSectionToTheEnd(ShstrtabSD, Layout);
+}
+
+void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+ const_cast<MCAsmLayout&>(Layout));
+
+ // Add 1 for the null section.
+ unsigned NumSections = Asm.size() + 1;
+
+ uint64_t SectionDataSize = 0;
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+
+ // Get the size of the section in the output file (including padding).
+ uint64_t Size = Layout.getSectionFileSize(&SD);
+ SectionDataSize += Size;
+ }
+
+ // Write out the ELF header ...
+ WriteHeader(SectionDataSize, NumSections);
+ FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr);
+
+ // ... then all of the sections ...
+ DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
+
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+
+ unsigned Index = 1;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&it->getSection()] = FileOff;
+
+ SectionIndexMap[&it->getSection()] = Index++;
+
+ const MCSectionData &SD = *it;
+ FileOff += Layout.getSectionFileSize(&SD);
+
+ Asm.WriteSectionData(it, Layout, Writer);
+ }
+
+ // ... and then the section header table.
+ // Should we align the section header table?
+ //
+ // Null section first.
+ WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ uint64_t sh_link = 0;
+ uint64_t sh_info = 0;
+
+ switch(Section.getType()) {
+ case ELF::SHT_DYNAMIC:
+ sh_link = SectionStringTableIndex[&it->getSection()];
+ sh_info = 0;
+ break;
+
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA: {
+ const MCSection *SymtabSection;
+ const MCSection *InfoSection;
+
+ SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ false);
+ sh_link = SectionIndexMap[SymtabSection];
+
+ // Remove ".rel" and ".rela" prefixes.
+ unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+ StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+ InfoSection = Asm.getContext().getELFSection(SectionName,
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly(),
+ false);
+ sh_info = SectionIndexMap[InfoSection];
+ break;
+ }
+
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ sh_link = StringTableIndex;
+ sh_info = LastLocalSymbolIndex;
+ break;
+
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_STRTAB:
+ case ELF::SHT_NOBITS:
+ case ELF::SHT_NULL:
+ // Nothing to do.
+ break;
+
+ case ELF::SHT_HASH:
+ case ELF::SHT_GROUP:
+ case ELF::SHT_SYMTAB_SHNDX:
+ default:
+ assert(0 && "FIXME: sh_type value not supported!");
+ break;
+ }
+
+ WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()],
+ Section.getType(), Section.getFlags(),
+ Layout.getSectionAddress(&SD),
+ SectionOffsetMap.lookup(&SD.getSection()),
+ Layout.getSectionSize(&SD), sh_link,
+ sh_info, SD.getAlignment(),
+ Section.getEntrySize());
+ }
+}
+
+ELFObjectWriter::ELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ bool IsLittleEndian,
+ bool HasRelocationAddend)
+ : MCObjectWriter(OS, IsLittleEndian)
+{
+ Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend);
+}
+
+ELFObjectWriter::~ELFObjectWriter() {
+ delete (ELFObjectWriterImpl*) Impl;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+ ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+}
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+}
+
+void ELFObjectWriter::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index a275be2c53c5..670b2e9b292a 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -68,7 +68,9 @@ MCAsmInfo::MCAsmInfo() {
ExceptionsType = ExceptionHandling::None;
DwarfRequiresFrameSection = true;
DwarfUsesInlineInfoSection = false;
+ DwarfUsesAbsoluteLabelForStmtList = true;
DwarfSectionOffsetDirective = 0;
+ DwarfUsesLabelOffsetForRanges = true;
HasMicrosoftFastStdCallMangling = false;
AsmTransCBE = 0;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 0bd3b2d001e8..e0e261a63c70 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -44,5 +44,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
+
+ DwarfUsesAbsoluteLabelForStmtList = false;
+ DwarfUsesLabelOffsetForRanges = false;
}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e272b60c4475..1cc8fb0b5486 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -31,7 +31,7 @@ class MCAsmStreamer : public MCStreamer {
formatted_raw_ostream &OS;
const MCAsmInfo &MAI;
OwningPtr<MCInstPrinter> InstPrinter;
- MCCodeEmitter *Emitter;
+ OwningPtr<MCCodeEmitter> Emitter;
SmallString<128> CommentToEmit;
raw_svector_ostream CommentStream;
@@ -217,6 +217,7 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
void MCAsmStreamer::SwitchSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
if (Section != CurSection) {
+ PrevSection = CurSection;
CurSection = Section;
Section->PrintSwitchToSection(MAI, OS);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 7d8455492780..f0e1d7fbc21c 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -178,8 +178,12 @@ uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
MCFragment::MCFragment() : Kind(FragmentType(~0)) {
}
+MCFragment::~MCFragment() {
+}
+
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0))
+ : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)),
+ EffectiveSize(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
@@ -207,7 +211,8 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
uint64_t _Offset, MCAssembler *A)
: Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
IsExternal(false), IsPrivateExtern(false),
- CommonSize(0), CommonAlign(0), Flags(0), Index(0)
+ CommonSize(0), SymbolSize(0), CommonAlign(0),
+ Flags(0), Index(0)
{
if (A)
A->getSymbolList().push_back(this);
@@ -623,8 +628,23 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
switch (it->getKind()) {
default:
assert(0 && "Invalid fragment in virtual section!");
+ case MCFragment::FT_Data: {
+ // Check that we aren't trying to write a non-zero contents (or fixups)
+ // into a virtual section. This is to support clients which use standard
+ // directives to fill the contents of virtual sections.
+ MCDataFragment &DF = cast<MCDataFragment>(*it);
+ assert(DF.fixup_begin() == DF.fixup_end() &&
+ "Cannot have fixups in virtual section!");
+ for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+ assert(DF.getContents()[i] == 0 &&
+ "Invalid data value for virtual section!");
+ break;
+ }
case MCFragment::FT_Align:
- assert(!cast<MCAlignFragment>(it)->getValueSize() &&
+ // Check that we aren't trying to write a non-zero value into a virtual
+ // section.
+ assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+ !cast<MCAlignFragment>(it)->getValue()) &&
"Invalid align in virtual section!");
break;
case MCFragment::FT_Fill:
@@ -647,7 +667,41 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
}
-void MCAssembler::Finish() {
+void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) {
+ // Create dummy fragments and assign section ordinals.
+ unsigned SectionIndex = 0;
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it)
+ SectionIndex++;
+
+ SD.setOrdinal(SectionIndex);
+
+ // Assign layout order indices to sections and fragments.
+ unsigned FragmentIndex = 0;
+ unsigned i = 0;
+ for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+
+ for (MCSectionData::iterator it2 = SD->begin(),
+ ie2 = SD->end(); it2 != ie2; ++it2)
+ FragmentIndex++;
+ }
+
+ SD.setLayoutOrder(i);
+ for (MCSectionData::iterator it2 = SD.begin(),
+ ie2 = SD.end(); it2 != ie2; ++it2) {
+ it2->setLayoutOrder(FragmentIndex++);
+ }
+ Layout.getSectionOrder().push_back(&SD);
+
+ Layout.LayoutSection(&SD);
+
+ // Layout until everything fits.
+ while (LayoutOnce(Layout))
+ continue;
+
+}
+
+void MCAssembler::Finish(MCObjectWriter *Writer) {
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
@@ -717,9 +771,15 @@ void MCAssembler::Finish() {
dump(); });
uint64_t StartOffset = OS.tell();
- llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
- if (!Writer)
- report_fatal_error("unable to create object writer!");
+
+ llvm::OwningPtr<MCObjectWriter> OwnWriter(0);
+ if (Writer == 0) {
+ //no custom Writer_ : create the default one life-managed by OwningPtr
+ OwnWriter.reset(getBackend().createObjectWriter(OS));
+ Writer = OwnWriter.get();
+ if (!Writer)
+ report_fatal_error("unable to create object writer!");
+ }
// Allow the object writer a chance to perform post-layout binding (for
// example, to set the index fields in the symbol data).
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 11370642530a..e5586a0d7c31 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -23,7 +24,8 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
+MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
+ CurrentDwarfLoc(0,0,0,0,0) {
MachOUniquingMap = 0;
ELFUniquingMap = 0;
COFFUniquingMap = 0;
@@ -31,6 +33,8 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
SecureLogFile = getenv("AS_SECURE_LOG_FILE");
SecureLog = 0;
SecureLogUsed = false;
+
+ DwarfLocSeen = false;
}
MCContext::~MCContext() {
@@ -147,7 +151,7 @@ getMachOSection(StringRef Segment, StringRef Section,
const MCSection *MCContext::
getELFSection(StringRef Section, unsigned Type, unsigned Flags,
- SectionKind Kind, bool IsExplicit) {
+ SectionKind Kind, bool IsExplicit, unsigned EntrySize) {
if (ELFUniquingMap == 0)
ELFUniquingMap = new ELFUniqueMapTy();
ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
@@ -157,7 +161,7 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
if (Entry.getValue()) return Entry.getValue();
MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
- Kind, IsExplicit);
+ Kind, IsExplicit, EntrySize);
Entry.setValue(Result);
return Result;
}
@@ -181,3 +185,81 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
Entry.setValue(Result);
return Result;
}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables. If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned. The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+ // TODO: a FileNumber of zero says to use the next available file number.
+ // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+ // to not be less than one. This needs to be change to be not less than zero.
+
+ // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+ if (FileNumber >= MCDwarfFiles.size()) {
+ MCDwarfFiles.resize(FileNumber + 1);
+ } else {
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ // It is an error to use see the same number more than once.
+ return 0;
+ }
+
+ // Get the new MCDwarfFile slot for this FileNumber.
+ MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+ // Separate the directory part from the basename of the FileName.
+ std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+
+ // Find or make a entry in the MCDwarfDirs vector for this Directory.
+ StringRef Name;
+ unsigned DirIndex;
+ // Capture directory name.
+ if (Slash.second.empty()) {
+ Name = Slash.first;
+ DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+ } else {
+ StringRef Directory = Slash.first;
+ Name = Slash.second;
+ for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+ if (Directory == MCDwarfDirs[DirIndex])
+ break;
+ }
+ if (DirIndex >= MCDwarfDirs.size()) {
+ char *Buf = static_cast<char *>(Allocate(Directory.size()));
+ memcpy(Buf, Directory.data(), Directory.size());
+ MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+ }
+ // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+ // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+ // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
+ // stored at MCDwarfFiles[FileNumber].Name .
+ DirIndex++;
+ }
+
+ // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+ // vector.
+ char *Buf = static_cast<char *>(Allocate(Name.size()));
+ memcpy(Buf, Name.data(), Name.size());
+ File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+
+ // return the allocated FileNumber.
+ return FileNumber;
+}
+
+/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) {
+ if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+ return false;
+
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ return true;
+ else
+ return false;
+}
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
new file mode 100644
index 000000000000..5fa7b70194b2
--- /dev/null
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+add_llvm_library(LLVMMCDisassembler
+ EDDisassembler.cpp
+ EDOperand.cpp
+ EDInst.cpp
+ EDToken.cpp
+ )
diff --git a/tools/edis/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 85e41e61af9f..697b3d9c0515 100644
--- a/tools/edis/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -15,9 +15,6 @@
#include "EDDisassembler.h"
#include "EDInst.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -27,7 +24,6 @@
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -39,7 +35,6 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSelect.h"
-
using namespace llvm;
bool EDDisassembler::sInitialized = false;
@@ -80,22 +75,22 @@ static const char *tripleFromArch(Triple::ArchType arch) {
/// @arg arch - The target architecture
/// @arg syntax - The assembly syntax in sd form
static int getLLVMSyntaxVariant(Triple::ArchType arch,
- EDAssemblySyntax_t syntax) {
+ EDDisassembler::AssemblySyntax syntax) {
switch (syntax) {
default:
return -1;
// Mappings below from X86AsmPrinter.cpp
- case kEDAssemblySyntaxX86ATT:
+ case EDDisassembler::kEDAssemblySyntaxX86ATT:
if (arch == Triple::x86 || arch == Triple::x86_64)
return 0;
else
return -1;
- case kEDAssemblySyntaxX86Intel:
+ case EDDisassembler::kEDAssemblySyntaxX86Intel:
if (arch == Triple::x86 || arch == Triple::x86_64)
return 1;
else
return -1;
- case kEDAssemblySyntaxARMUAL:
+ case EDDisassembler::kEDAssemblySyntaxARMUAL:
if (arch == Triple::arm || arch == Triple::thumb)
return 0;
else
@@ -119,7 +114,7 @@ void EDDisassembler::initialize() {
#undef BRINGUP_TARGET
EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
- EDAssemblySyntax_t syntax) {
+ AssemblySyntax syntax) {
CPUKey key;
key.Arch = arch;
key.Syntax = syntax;
@@ -144,10 +139,8 @@ EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
}
EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
- EDAssemblySyntax_t syntax) {
- Triple triple(str);
-
- return getDisassembler(triple.getArch(), syntax);
+ AssemblySyntax syntax) {
+ return getDisassembler(Triple(str).getArch(), syntax);
}
EDDisassembler::EDDisassembler(CPUKey &key) :
@@ -176,11 +169,10 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
std::string featureString;
- OwningPtr<const TargetMachine>
- targetMachine(Tgt->createTargetMachine(tripleString,
- featureString));
+ TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+ featureString));
- const TargetRegisterInfo *registerInfo = targetMachine->getRegisterInfo();
+ const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
if (!registerInfo)
return;
@@ -210,7 +202,7 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
- initMaps(*targetMachine->getRegisterInfo());
+ initMaps(*TargetMachine->getRegisterInfo());
Valid = true;
}
@@ -364,11 +356,14 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
MCContext context(*AsmInfo);
OwningPtr<MCStreamer> streamer(createNullStreamer(context));
- AsmParser genericParser(*Tgt, sourceMgr, context, *streamer, *AsmInfo);
- OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(genericParser));
-
- AsmToken OpcodeToken = genericParser.Lex();
- AsmToken NextToken = genericParser.Lex(); // consume next token, because specificParser expects us to
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ context, *streamer,
+ *AsmInfo));
+ OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
+ *TargetMachine));
+
+ AsmToken OpcodeToken = genericParser->Lex();
+ AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
if (OpcodeToken.is(AsmToken::Identifier)) {
instName = OpcodeToken.getString();
diff --git a/tools/edis/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 74a260e82532..e2f850bcdba9 100644
--- a/tools/edis/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -1,4 +1,4 @@
-//===-EDDisassembler.h - LLVM Enhanced Disassembler -------------*- C++ -*-===//
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,12 +13,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef EDDisassembler_
-#define EDDisassembler_
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
-#include "EDInfo.inc"
-
-#include "llvm-c/EnhancedDisassembly.h"
+#include "EDInfo.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/Triple.h"
@@ -27,7 +25,6 @@
#include <map>
#include <set>
-#include <string>
#include <vector>
namespace llvm {
@@ -47,14 +44,28 @@ class MCStreamer;
template <typename T> class SmallVectorImpl;
class SourceMgr;
class Target;
+class TargetMachine;
class TargetRegisterInfo;
struct EDInstInfo;
-}
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
/// EDDisassembler - Encapsulates a disassembler for a single architecture and
/// disassembly syntax. Also manages the static disassembler registry.
struct EDDisassembler {
+ typedef enum {
+ /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86Intel = 0,
+ /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86ATT = 1,
+ kEDAssemblySyntaxARMUAL = 2
+ } AssemblySyntax;
+
+
////////////////////
// Static members //
////////////////////
@@ -66,7 +77,7 @@ struct EDDisassembler {
llvm::Triple::ArchType Arch;
/// The assembly syntax
- EDAssemblySyntax_t Syntax;
+ AssemblySyntax Syntax;
/// operator== - Equality operator
bool operator==(const CPUKey &key) const {
@@ -97,7 +108,7 @@ struct EDDisassembler {
/// @arg arch - The desired architecture
/// @arg syntax - The desired disassembly syntax
static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
- EDAssemblySyntax_t syntax);
+ AssemblySyntax syntax);
/// getDisassembler - Returns the disassembler for a given combination of
/// CPU type, CPU subtype, and assembly syntax, or NULL on failure
@@ -106,7 +117,7 @@ struct EDDisassembler {
/// "x86_64-apple-darwin"
/// @arg syntax - The disassembly syntax for the required disassembler
static EDDisassembler *getDisassembler(llvm::StringRef str,
- EDAssemblySyntax_t syntax);
+ AssemblySyntax syntax);
/// initialize - Initializes the disassembler registry and the LLVM backend
static void initialize();
@@ -127,6 +138,8 @@ struct EDDisassembler {
CPUKey Key;
/// The LLVM target corresponding to the disassembler
const llvm::Target *Tgt;
+ /// The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
/// The assembly information for the target architecture
llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
/// The disassembler for the target architecture
@@ -253,4 +266,6 @@ struct EDDisassembler {
int llvmSyntaxVariant() const;
};
+} // end namespace llvm
+
#endif
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
new file mode 100644
index 000000000000..627c06641dbc
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInfo.h
@@ -0,0 +1,73 @@
+//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINFO_H
+#define LLVM_EDINFO_H
+
+enum {
+ EDIS_MAX_OPERANDS = 13,
+ EDIS_MAX_SYNTAXES = 2
+};
+
+enum OperandTypes {
+ kOperandTypeNone,
+ kOperandTypeImmediate,
+ kOperandTypeRegister,
+ kOperandTypeX86Memory,
+ kOperandTypeX86EffectiveAddress,
+ kOperandTypeX86PCRelative,
+ kOperandTypeARMBranchTarget,
+ kOperandTypeARMSoReg,
+ kOperandTypeARMSoImm,
+ kOperandTypeARMSoImm2Part,
+ kOperandTypeARMPredicate,
+ kOperandTypeARMAddrMode2,
+ kOperandTypeARMAddrMode2Offset,
+ kOperandTypeARMAddrMode3,
+ kOperandTypeARMAddrMode3Offset,
+ kOperandTypeARMAddrMode4,
+ kOperandTypeARMAddrMode5,
+ kOperandTypeARMAddrMode6,
+ kOperandTypeARMAddrMode6Offset,
+ kOperandTypeARMAddrModePC,
+ kOperandTypeARMRegisterList,
+ kOperandTypeARMTBAddrMode,
+ kOperandTypeThumbITMask,
+ kOperandTypeThumbAddrModeS1,
+ kOperandTypeThumbAddrModeS2,
+ kOperandTypeThumbAddrModeS4,
+ kOperandTypeThumbAddrModeRR,
+ kOperandTypeThumbAddrModeSP,
+ kOperandTypeThumb2SoReg,
+ kOperandTypeThumb2SoImm,
+ kOperandTypeThumb2AddrModeImm8,
+ kOperandTypeThumb2AddrModeImm8Offset,
+ kOperandTypeThumb2AddrModeImm12,
+ kOperandTypeThumb2AddrModeSoReg,
+ kOperandTypeThumb2AddrModeImm8s4,
+ kOperandTypeThumb2AddrModeImm8s4Offset
+};
+
+enum OperandFlags {
+ kOperandFlagSource = 0x1,
+ kOperandFlagTarget = 0x2
+};
+
+enum InstructionTypes {
+ kInstructionTypeNone,
+ kInstructionTypeMove,
+ kInstructionTypeBranch,
+ kInstructionTypePush,
+ kInstructionTypePop,
+ kInstructionTypeCall,
+ kInstructionTypeReturn
+};
+
+
+#endif
diff --git a/tools/edis/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
index c009f0f86871..e22408f060b1 100644
--- a/tools/edis/EDInst.cpp
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "EDDisassembler.h"
#include "EDInst.h"
+#include "EDDisassembler.h"
#include "EDOperand.h"
#include "EDToken.h"
diff --git a/tools/edis/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
index c8a747ff99f9..39d264fb7aad 100644
--- a/tools/edis/EDInst.h
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -1,4 +1,4 @@
-//===-EDInst.h - LLVM Enhanced Disassembler ---------------------*- C++ -*-===//
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,19 +13,24 @@
//
//===----------------------------------------------------------------------===//
-#ifndef EDInst_
-#define EDInst_
-
-#include "llvm-c/EnhancedDisassembly.h"
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+#include "llvm/System/DataTypes.h"
#include "llvm/ADT/SmallVector.h"
-
#include <string>
#include <vector>
namespace llvm {
+ class MCInst;
struct EDInstInfo;
-}
+ struct EDToken;
+ struct EDDisassembler;
+ struct EDOperand;
+
+#ifdef __BLOCKS__
+ typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
/// CachedResult - Encapsulates the result of a function along with the validity
/// of that result, so that slow functions don't need to run twice
@@ -172,4 +177,6 @@ struct EDInst {
#endif
};
+} // end namespace llvm
+
#endif
diff --git a/tools/edis/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index d63c1c6bfbf9..2aed123368da 100644
--- a/tools/edis/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -1,4 +1,4 @@
-//===-EDOperand.cpp - LLVM Enhanced Disassembler --------------------------===//
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,11 @@
//
//===----------------------------------------------------------------------===//
+#include "EDOperand.h"
#include "EDDisassembler.h"
#include "EDInst.h"
-#include "EDOperand.h"
-
#include "llvm/MC/EDInstInfo.h"
#include "llvm/MC/MCInst.h"
-
using namespace llvm;
EDOperand::EDOperand(const EDDisassembler &disassembler,
@@ -263,7 +261,7 @@ int EDOperand::isMemory() {
#ifdef __BLOCKS__
struct RegisterReaderWrapper {
- EDRegisterBlock_t regBlock;
+ EDOperand::EDRegisterBlock_t regBlock;
};
int readerWrapperCallback(uint64_t *value,
diff --git a/tools/edis/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h
index ad9345b758bd..6e695224318c 100644
--- a/tools/edis/EDOperand.h
+++ b/lib/MC/MCDisassembler/EDOperand.h
@@ -13,10 +13,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef EDOperand_
-#define EDOperand_
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
+ void* arg);
-#include "llvm-c/EnhancedDisassembly.h"
/// EDOperand - Encapsulates a single operand, which can be evaluated by the
/// client
@@ -69,10 +78,14 @@ struct EDOperand {
int isMemory();
#ifdef __BLOCKS__
+ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
/// evaluate - Like evaluate for a callback, but uses a block instead
int evaluate(uint64_t &result,
EDRegisterBlock_t regBlock);
#endif
};
+} // end namespace llvm
+
#endif
diff --git a/tools/edis/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
index 3bcb0a14b8c4..400e1649e970 100644
--- a/tools/edis/EDToken.cpp
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -1,4 +1,4 @@
-//===-EDToken.cpp - LLVM Enhanced Disassembler ----------------------------===//
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,13 +13,11 @@
//
//===----------------------------------------------------------------------===//
-#include "EDDisassembler.h"
#include "EDToken.h"
-
-#include "llvm/ADT/SmallVector.h"
+#include "EDDisassembler.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
EDToken::EDToken(StringRef str,
diff --git a/tools/edis/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
index e4ae91f7ec3a..6b2aeac60ba5 100644
--- a/tools/edis/EDToken.h
+++ b/lib/MC/MCDisassembler/EDToken.h
@@ -13,15 +13,18 @@
//
//===----------------------------------------------------------------------===//
-#ifndef EDToken_
-#define EDToken_
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
-#include "llvm-c/EnhancedDisassembly.h"
#include "llvm/ADT/StringRef.h"
-
+#include "llvm/System/DataTypes.h"
#include <string>
#include <vector>
+namespace llvm {
+
+struct EDDisassembler;
+
/// EDToken - Encapsulates a single token, which can provide a string
/// representation of itself or interpret itself in various ways, depending
/// on the token type.
@@ -132,4 +135,5 @@ struct EDToken {
int getString(const char*& buf);
};
+} // end namespace llvm
#endif
diff --git a/lib/MC/MCDisassembler/Makefile b/lib/MC/MCDisassembler/Makefile
new file mode 100644
index 000000000000..7d71cd381a7c
--- /dev/null
+++ b/lib/MC/MCDisassembler/Makefile
@@ -0,0 +1,14 @@
+##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCDisassembler
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
new file mode 100644
index 000000000000..2da71f96c676
--- /dev/null
+++ b/lib/MC/MCDwarf.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+ OS << '"' << getName() << '"';
+}
+
+void MCDwarfFile::dump() const {
+ print(dbgs());
+}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 000000000000..570c3917ab46
--- /dev/null
+++ b/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,408 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCELFStreamer : public MCObjectStreamer {
+ void EmitInstToFragment(const MCInst &Inst);
+ void EmitInstToData(const MCInst &Inst);
+public:
+ MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ ~MCELFStreamer() {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setSize(Value);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
+ virtual void EmitGPRel32Value(const MCExpr *Value) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n");
+ }
+
+ virtual void EmitInstruction(const MCInst &Inst);
+ virtual void Finish();
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+
+ Symbol->setSection(*CurSection);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ }
+
+ assert(0 && "invalid assembler flag!");
+}
+
+void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_LazyReference:
+ case MCSA_Reference:
+ case MCSA_NoDeadStrip:
+ case MCSA_PrivateExtern:
+ case MCSA_WeakDefinition:
+ case MCSA_WeakDefAutoPrivate:
+ case MCSA_Invalid:
+ case MCSA_ELF_TypeIndFunction:
+ case MCSA_IndirectSymbol:
+ assert(0 && "Invalid symbol attribute for ELF!");
+ break;
+
+ case MCSA_Global:
+ SD.setFlags(SD.getFlags() | ELF_STB_Global);
+ SD.setExternal(true);
+ break;
+
+ case MCSA_WeakReference:
+ case MCSA_Weak:
+ SD.setFlags(SD.getFlags() | ELF_STB_Weak);
+ break;
+
+ case MCSA_Local:
+ SD.setFlags(SD.getFlags() | ELF_STB_Local);
+ break;
+
+ case MCSA_ELF_TypeFunction:
+ SD.setFlags(SD.getFlags() | ELF_STT_Func);
+ break;
+
+ case MCSA_ELF_TypeObject:
+ SD.setFlags(SD.getFlags() | ELF_STT_Object);
+ break;
+
+ case MCSA_ELF_TypeTLS:
+ SD.setFlags(SD.getFlags() | ELF_STT_Tls);
+ break;
+
+ case MCSA_ELF_TypeCommon:
+ SD.setFlags(SD.getFlags() | ELF_STT_Common);
+ break;
+
+ case MCSA_ELF_TypeNoType:
+ SD.setFlags(SD.getFlags() | ELF_STT_Notype);
+ break;
+
+ case MCSA_Protected:
+ SD.setFlags(SD.getFlags() | ELF_STV_Protected);
+ break;
+
+ case MCSA_Hidden:
+ SD.setFlags(SD.getFlags() | ELF_STV_Hidden);
+ break;
+
+ case MCSA_Internal:
+ SD.setFlags(SD.getFlags() | ELF_STV_Internal);
+ break;
+ }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) {
+ const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+ MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE |
+ MCSectionELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD.setFragment(F);
+ Symbol->setSection(*Section);
+ SD.setSize(MCConstantExpr::Create(Size, getContext()));
+ }
+
+ SD.setFlags(SD.getFlags() | ELF_STB_Global);
+ SD.setExternal(true);
+
+ SD.setCommon(Size, ByteAlignment);
+}
+
+void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+ } else {
+ DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+ }
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ // TODO: This is exactly the same as MCMachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+ MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+ Symbol->setSection(*CurSection);
+ Symbol->setAbsolute();
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ IF->getCode() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
+ return;
+ }
+
+ // Otherwise, if we are relaxing everything, relax the instruction as much as
+ // possible and emit it as data.
+ if (getAssembler().getRelaxAll()) {
+ MCInst Relaxed;
+ getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
+ }
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
+}
+
+void MCELFStreamer::Finish() {
+ getAssembler().Finish();
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 44bc267c11c2..671874df2c69 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -18,6 +18,8 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmBackend.h"
@@ -28,58 +30,19 @@ namespace {
class MCMachOStreamer : public MCObjectStreamer {
private:
- MCFragment *getCurrentFragment() const {
- assert(getCurrentSectionData() && "No current section!");
-
- if (!getCurrentSectionData()->empty())
- return &getCurrentSectionData()->getFragmentList().back();
-
- return 0;
- }
-
- /// Get a data fragment to write into, creating a new one if the current
- /// fragment is not a data fragment.
- MCDataFragment *getOrCreateDataFragment() const {
- MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!F)
- F = new MCDataFragment(getCurrentSectionData());
- return F;
- }
-
void EmitInstToFragment(const MCInst &Inst);
void EmitInstToData(const MCInst &Inst);
+ // FIXME: These will likely moved to a better place.
+ void MakeLineEntryForSection(const MCSection *Section);
+ const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End,
+ int IntVal);
+ void EmitDwarfFileTable(void);
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
: MCObjectStreamer(Context, TAB, OS, Emitter) {}
- const MCExpr *AddValueSymbols(const MCExpr *Value) {
- switch (Value->getKind()) {
- case MCExpr::Target: assert(0 && "Can't handle target exprs yet!");
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols(BE->getLHS());
- AddValueSymbols(BE->getRHS());
- break;
- }
-
- case MCExpr::SymbolRef:
- getAssembler().getOrCreateSymbolData(
- cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
- break;
- }
-
- return Value;
- }
-
/// @name MCStreamer Interface
/// @{
@@ -126,10 +89,16 @@ public:
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename) {
- report_fatal_error("unsupported directive: '.file'");
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
}
virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- report_fatal_error("unsupported directive: '.file'");
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
}
virtual void EmitInstruction(const MCInst &Inst);
@@ -142,6 +111,8 @@ public:
} // end anonymous namespace.
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+ // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging
+ // into MCObjectStreamer.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(CurSection && "Cannot emit before setting section!");
@@ -185,6 +156,8 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
}
void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
// FIXME: Lift context changes into super class.
getAssembler().getOrCreateSymbolData(*Symbol);
Symbol->setVariableValue(AddValueSymbols(Value));
@@ -335,11 +308,15 @@ void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
}
void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
MCDataFragment *DF = getOrCreateDataFragment();
// Avoid fixups when possible.
@@ -359,6 +336,8 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
@@ -371,6 +350,8 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
@@ -429,6 +410,10 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
getCurrentSectionData()->setHasInstructions(true);
+ // Now that a machine instruction has been assembled into this section, make
+ // a line entry for any .loc directive that has been seen.
+ MakeLineEntryForSection(getCurrentSection());
+
// If this instruction doesn't need relaxation, just emit it as data.
if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
EmitInstToData(Inst);
@@ -450,7 +435,207 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
EmitInstToFragment(Inst);
}
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) {
+ if (!getContext().getDwarfLocSeen())
+ return;
+
+ // Create a symbol at in the current section for use in the line entry.
+ MCSymbol *LineSym = getContext().CreateTempSymbol();
+ // Set the value of the symbol to use for the MCLineEntry.
+ EmitLabel(LineSym);
+
+ // Get the current .loc info saved in the context.
+ const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc();
+
+ // Create a (local) line entry with the symbol and the current .loc info.
+ MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+ // clear DwarfLocSeen saying the current .loc info is now used.
+ getContext().clearDwarfLocSeen();
+
+ // Get the MCLineSection for this section, if one does not exist for this
+ // section create it.
+ DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ getContext().getMCLineSections();
+ MCLineSection *LineSection = MCLineSections[Section];
+ if (!LineSection) {
+ // Create a new MCLineSection. This will be deleted after the dwarf line
+ // table is created using it by iterating through the MCLineSections
+ // DenseMap.
+ LineSection = new MCLineSection;
+ // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+ MCLineSections[Section] = LineSection;
+ }
+
+ // Add the line entry to this section's entries.
+ LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal for use
+// by EmitDwarfFileTable() below.
+//
+const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start,
+ MCSymbol *End,
+ int IntVal) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(End, Variant, getContext());
+ const MCExpr *RHS =
+ MCSymbolRefExpr::Create(Start, Variant, getContext());
+ const MCExpr *Res1 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext());
+ const MCExpr *Res2 =
+ MCConstantExpr::Create(IntVal, getContext());
+ const MCExpr *Res3 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext());
+ return Res3;
+}
+
+//
+// This emits the Dwarf file (and eventually the line) table.
+//
+void MCMachOStreamer::EmitDwarfFileTable(void) {
+ // For now make sure we don't put out the Dwarf file table if no .file
+ // directives were seen.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ if (MCDwarfFiles.size() == 0)
+ return;
+
+ // This is the Mach-O section, for ELF it is the .debug_line section.
+ SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line",
+ MCSectionMachO::S_ATTR_DEBUG,
+ 0, SectionKind::getDataRelLocal()));
+
+ // Create a symbol at the beginning of this section.
+ MCSymbol *LineStartSym = getContext().CreateTempSymbol();
+ // Set the value of the symbol, as we are at the start of the section.
+ EmitLabel(LineStartSym);
+
+ // Create a symbol for the end of the section (to be set when we get there).
+ MCSymbol *LineEndSym = getContext().CreateTempSymbol();
+
+ // The first 4 bytes is the total length of the information for this
+ // compilation unit (not including these 4 bytes for the length).
+ EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0);
+
+ // Next 2 bytes is the Version, which is Dwarf 2.
+ EmitIntValue(2, 2);
+
+ // Create a symbol for the end of the prologue (to be set when we get there).
+ MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end
+
+ // Length of the prologue, is the next 4 bytes. Which is the start of the
+ // section to the end of the prologue. Not including the 4 bytes for the
+ // total length, the 2 bytes for the version, and these 4 bytes for the
+ // length of the prologue.
+ EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0);
+
+ // Parameters of the state machine, are next.
+ // Define the architecture-dependent minimum instruction length (in
+ // bytes). This value should be rather too small than too big. */
+ // DWARF2_LINE_MIN_INSN_LENGTH
+ EmitIntValue(1, 1);
+ // Flag that indicates the initial value of the is_stmt_start flag.
+ // DWARF2_LINE_DEFAULT_IS_STMT
+ EmitIntValue(1, 1);
+ // Minimum line offset in a special line info. opcode. This value
+ // was chosen to give a reasonable range of values. */
+ // DWARF2_LINE_BASE
+ EmitIntValue(uint64_t(-5), 1);
+ // Range of line offsets in a special line info. opcode.
+ // DWARF2_LINE_RANGE
+ EmitIntValue(14, 1);
+ // First special line opcode - leave room for the standard opcodes.
+ // DWARF2_LINE_OPCODE_BASE
+ EmitIntValue(13, 1);
+
+ // Standard opcode lengths
+ EmitIntValue(0, 1); // length of DW_LNS_copy
+ EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+ EmitIntValue(1, 1); // length of DW_LNS_advance_line
+ EmitIntValue(1, 1); // length of DW_LNS_set_file
+ EmitIntValue(1, 1); // length of DW_LNS_set_column
+ EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+ EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+ EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+ EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+ EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+ EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+ EmitIntValue(1, 1); // DW_LNS_set_isa
+
+ // Put out the directory and file tables.
+
+ // First the directory table.
+ const std::vector<StringRef> &MCDwarfDirs =
+ getContext().getMCDwarfDirs();
+ for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+ EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+ EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
+ }
+ EmitIntValue(0, 1); // Terminate the directory list
+
+ // Second the file table.
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+ EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
+ // FIXME the Directory number should be a .uleb128 not a .byte
+ EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1);
+ EmitIntValue(0, 1); // last modification timestamp (always 0)
+ EmitIntValue(0, 1); // filesize (always 0)
+ }
+ EmitIntValue(0, 1); // Terminate the file list
+
+ // This is the end of the prologue, so set the value of the symbol at the
+ // end of the prologue (that was used in a previous expression).
+ EmitLabel(ProEndSym);
+
+ // TODO: This is the point where the line tables would be emitted.
+
+ // Delete the MCLineSections that were created in
+ // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line
+ // tables.
+ DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ getContext().getMCLineSections();
+ for (DenseMap<const MCSection *, MCLineSection *>::iterator it =
+ MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) {
+ delete it->second;
+ }
+
+ // If there are no line tables emited then we emit:
+ // The following DW_LNE_set_address sequence to set the address to zero
+ // TODO test for 32-bit or 64-bit output
+ // This is the sequence for 32-bit code
+ EmitIntValue(0, 1);
+ EmitIntValue(5, 1);
+ EmitIntValue(2, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+
+ // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01'
+ // (00 is the code for extended opcodes, followed by a ULEB128 length of the
+ // extended opcode (01), and the DW_LNE_end_sequence (01).
+ EmitIntValue(0, 1); // DW_LNS_extended_op
+ EmitIntValue(1, 1); // ULEB128 length of the extended opcode
+ EmitIntValue(1, 1); // DW_LNE_end_sequence
+
+ // This is the end of the section, so set the value of the symbol at the end
+ // of this section (that was used in a previous expression).
+ EmitLabel(LineEndSym);
+}
+
void MCMachOStreamer::Finish() {
+ // Dump out the dwarf file and directory tables (soon to include line table)
+ EmitDwarfFileTable();
+
// We have to set the fragment atom associations so we can relax properly for
// Mach-O.
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 5332ade21153..f7a2f20ca4bc 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -26,6 +26,7 @@ namespace {
/// @{
virtual void SwitchSection(const MCSection *Section) {
+ PrevSection = CurSection;
CurSection = Section;
}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index d3f7f7783ffa..2b2385ef9156 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -9,7 +9,11 @@
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -21,15 +25,59 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
}
MCObjectStreamer::~MCObjectStreamer() {
+ delete &Assembler->getBackend();
+ delete &Assembler->getEmitter();
delete Assembler;
}
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+ assert(getCurrentSectionData() && "No current section!");
+
+ if (!getCurrentSectionData()->empty())
+ return &getCurrentSectionData()->getFragmentList().back();
+
+ return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+ MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!F)
+ F = new MCDataFragment(getCurrentSectionData());
+ return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols(BE->getLHS());
+ AddValueSymbols(BE->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+ break;
+ }
+
+ return Value;
+}
+
void MCObjectStreamer::SwitchSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
// If already in this section, then this is a noop.
if (Section == CurSection) return;
+ PrevSection = CurSection;
CurSection = Section;
CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 465d98382877..086df081a938 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -117,6 +117,13 @@ AsmToken AsmLexer::LexLineComment() {
return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
}
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+ if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+ CurPtr += 2;
+ if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+ CurPtr += 3;
+}
+
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
@@ -133,7 +140,7 @@ AsmToken AsmLexer::LexDigit() {
++CurPtr;
StringRef Result(TokStart, CurPtr - TokStart);
-
+
long long Value;
if (Result.getAsInteger(10, Value)) {
// We have to handle minint_as_a_positive_value specially, because
@@ -143,6 +150,11 @@ AsmToken AsmLexer::LexDigit() {
else
return ReturnError(TokStart, "Invalid decimal number");
}
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
@@ -165,9 +177,13 @@ AsmToken AsmLexer::LexDigit() {
StringRef Result(TokStart, CurPtr - TokStart);
long long Value;
- if (Result.getAsInteger(2, Value))
+ if (Result.substr(2).getAsInteger(2, Value))
return ReturnError(TokStart, "Invalid binary number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
@@ -185,6 +201,10 @@ AsmToken AsmLexer::LexDigit() {
if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
return ReturnError(TokStart, "Invalid hexadecimal number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
(int64_t)Result);
}
@@ -198,6 +218,10 @@ AsmToken AsmLexer::LexDigit() {
if (Result.getAsInteger(8, Value))
return ReturnError(TokStart, "Invalid octal number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index e0949bd2856f..f83cd5eb2a16 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -11,47 +11,237 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmParser.h"
+#include <vector>
using namespace llvm;
namespace {
+/// \brief Helper class for tracking macro definitions.
+struct Macro {
+ StringRef Name;
+ StringRef Body;
+
+public:
+ Macro(StringRef N, StringRef B) : Name(N), Body(B) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+ /// The macro being instantiated.
+ const Macro *TheMacro;
+
+ /// The macro instantiation with substitutions.
+ MemoryBuffer *Instantiation;
+
+ /// The location of the instantiation.
+ SMLoc InstantiationLoc;
+
+ /// The location where parsing should resume upon instantiation completion.
+ SMLoc ExitLoc;
+
+public:
+ MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ const std::vector<std::vector<AsmToken> > &A);
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+ friend class GenericAsmParser;
+
+ AsmParser(const AsmParser &); // DO NOT IMPLEMENT
+ void operator=(const AsmParser &); // DO NOT IMPLEMENT
+private:
+ AsmLexer Lexer;
+ MCContext &Ctx;
+ MCStreamer &Out;
+ SourceMgr &SrcMgr;
+ MCAsmParserExtension *GenericParser;
+ MCAsmParserExtension *PlatformParser;
+
+ /// This is the current buffer index we're lexing from as managed by the
+ /// SourceMgr object.
+ int CurBuffer;
+
+ AsmCond TheCondState;
+ std::vector<AsmCond> TheCondStack;
+
+ /// DirectiveMap - This is a table handlers for directives. Each handler is
+ /// invoked after the directive identifier is read and is responsible for
+ /// parsing and validating the rest of the directive. The handler is passed
+ /// in the directive name and the location of the directive keyword.
+ StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+
+ /// MacroMap - Map of currently defined macros.
+ StringMap<Macro*> MacroMap;
+
+ /// ActiveMacros - Stack of active macro instantiations.
+ std::vector<MacroInstantiation*> ActiveMacros;
+
+ /// Boolean tracking whether macro substitution is enabled.
+ unsigned MacrosEnabled : 1;
+
+public:
+ AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ const MCAsmInfo &MAI);
+ ~AsmParser();
+
+ virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+ void AddDirectiveHandler(MCAsmParserExtension *Object,
+ StringRef Directive,
+ DirectiveHandler Handler) {
+ DirectiveMap[Directive] = std::make_pair(Object, Handler);
+ }
+
+public:
+ /// @name MCAsmParser Interface
+ /// {
+
+ virtual SourceMgr &getSourceManager() { return SrcMgr; }
+ virtual MCAsmLexer &getLexer() { return Lexer; }
+ virtual MCContext &getContext() { return Ctx; }
+ virtual MCStreamer &getStreamer() { return Out; }
+
+ virtual void Warning(SMLoc L, const Twine &Meg);
+ virtual bool Error(SMLoc L, const Twine &Msg);
+
+ const AsmToken &Lex();
+
+ bool ParseExpression(const MCExpr *&Res);
+ virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+ /// }
+
+private:
+ bool ParseStatement();
+
+ bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+ void HandleMacroExit();
+
+ void PrintMacroInstantiations();
+ void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
+
+ /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+ bool EnterIncludeFile(const std::string &Filename);
+
+ /// \brief Reset the current lexer position to that given by \arg Loc. The
+ /// current token is not set; clients should ensure Lex() is called
+ /// subsequently.
+ void JumpToLoc(SMLoc Loc);
+
+ void EatToEndOfStatement();
+
+ /// \brief Parse up to the end of statement and a return the contents from the
+ /// current token until the end of the statement; the current token on exit
+ /// will be either the EndOfStatement or EOF.
+ StringRef ParseStringToEndOfStatement();
+
+ bool ParseAssignment(StringRef Name);
+
+ bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+ /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+ /// and set \arg Res to the identifier contents.
+ bool ParseIdentifier(StringRef &Res);
+
+ // Directive Parsing.
+ bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
+ bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool ParseDirectiveFill(); // ".fill"
+ bool ParseDirectiveSpace(); // ".space"
+ bool ParseDirectiveSet(); // ".set"
+ bool ParseDirectiveOrg(); // ".org"
+ // ".align{,32}", ".p2align{,w,l}"
+ bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+ /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+ /// accepts a single symbol (which should be a label or an external).
+ bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+ bool ParseDirectiveELFType(); // ELF specific ".type"
+
+ bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+ bool ParseDirectiveAbort(); // ".abort"
+ bool ParseDirectiveInclude(); // ".include"
+
+ bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+ bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+ bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+ /// ParseEscapedString - Parse the current token as a string which may include
+ /// escaped characters and return the string contents.
+ bool ParseEscapedString(std::string &Data);
+};
+
/// \brief Generic implementations of directive handling, etc. which is shared
/// (or the default, at least) for all assembler parser.
class GenericAsmParser : public MCAsmParserExtension {
+ template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<GenericAsmParser, Handler>);
+ }
+
public:
GenericAsmParser() {}
+ AsmParser &getParser() {
+ return (AsmParser&) this->MCAsmParserExtension::getParser();
+ }
+
virtual void Initialize(MCAsmParser &Parser) {
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
// Debugging directives.
- Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveFile));
- Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveLine));
- Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveLoc));
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+
+ // Macro directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_on");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_off");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
}
- bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
- bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
- bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
+ bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
};
}
@@ -69,7 +259,7 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
- TargetParser(0), CurBuffer(0) {
+ CurBuffer(0), MacrosEnabled(true) {
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
// Initialize the generic parser.
@@ -89,22 +279,33 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
}
AsmParser::~AsmParser() {
+ assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+ // Destroy any macros.
+ for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+ ie = MacroMap.end(); it != ie; ++it)
+ delete it->getValue();
+
delete PlatformParser;
delete GenericParser;
}
-void AsmParser::setTargetParser(TargetAsmParser &P) {
- assert(!TargetParser && "Target parser is already initialized!");
- TargetParser = &P;
- TargetParser->Initialize(*this);
+void AsmParser::PrintMacroInstantiations() {
+ // Print the active macro instantiation stack.
+ for (std::vector<MacroInstantiation*>::const_reverse_iterator
+ it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+ PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
+ "note");
}
void AsmParser::Warning(SMLoc L, const Twine &Msg) {
PrintMessage(L, Msg.str(), "warning");
+ PrintMacroInstantiations();
}
bool AsmParser::Error(SMLoc L, const Twine &Msg) {
PrintMessage(L, Msg.str(), "error");
+ PrintMacroInstantiations();
return true;
}
@@ -124,7 +325,12 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
return false;
}
-
+
+void AsmParser::JumpToLoc(SMLoc Loc) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
const AsmToken &AsmParser::Lex() {
const AsmToken *tok = &Lexer.Lex();
@@ -133,15 +339,13 @@ const AsmToken &AsmParser::Lex() {
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
- CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer),
- ParentIncludeLoc.getPointer());
+ JumpToLoc(ParentIncludeLoc);
tok = &Lexer.Lex();
}
}
if (tok->is(AsmToken::Error))
- PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error");
+ Error(Lexer.getErrLoc(), Lexer.getErr());
return *tok;
}
@@ -174,6 +378,16 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
if (TheCondState.TheCond != StartingCondState.TheCond ||
TheCondState.Ignore != StartingCondState.Ignore)
return TokError("unmatched .ifs or .elses");
+
+ // Check to see there are no empty DwarfFile slots.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ if (!MCDwarfFiles[i]){
+ TokError("unassigned file number: " + Twine(i) + " for .file directives");
+ HadError = true;
+ }
+ }
// Finalize the output stream if there are no errors and if the client wants
// us to.
@@ -194,6 +408,16 @@ void AsmParser::EatToEndOfStatement() {
Lex();
}
+StringRef AsmParser::ParseStringToEndOfStatement() {
+ const char *Start = getTok().getLoc().getPointer();
+
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ const char *End = getTok().getLoc().getPointer();
+ return StringRef(Start, End - Start);
+}
/// ParseParenExpr - Parse a paren expression and return it.
/// NOTE: This assumes the leading '(' has already been consumed.
@@ -225,10 +449,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return true;
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
+ case AsmToken::Dollar:
case AsmToken::String:
case AsmToken::Identifier: {
+ EndLoc = Lexer.getLoc();
+
+ StringRef Identifier;
+ if (ParseIdentifier(Identifier))
+ return false;
+
// This is a symbol reference.
- std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
+ std::pair<StringRef, StringRef> Split = Identifier.split('@');
MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Mark the symbol as used in an expression.
@@ -236,12 +467,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// Lookup the symbol variant if used.
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- if (Split.first.size() != getTok().getIdentifier().size())
+ if (Split.first.size() != Identifier.size())
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- EndLoc = Lexer.getLoc();
- Lex(); // Eat identifier.
-
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -568,7 +796,12 @@ bool AsmParser::ParseStatement() {
default: // Normal instruction or directive.
break;
}
-
+
+ // If macros are enabled, check to see if this is a macro instantiation.
+ if (MacrosEnabled)
+ if (const Macro *M = MacroMap.lookup(IDVal))
+ return HandleMacroEntry(IDVal, IDLoc, M);
+
// Otherwise, we have a normal instruction or directive.
if (IDVal[0] == '.') {
// Assembler features
@@ -591,11 +824,14 @@ bool AsmParser::ParseStatement() {
if (IDVal == ".quad")
return ParseDirectiveValue(8);
- // FIXME: Target hooks for IsPow2.
- if (IDVal == ".align")
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
- if (IDVal == ".align32")
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+ if (IDVal == ".align") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+ }
+ if (IDVal == ".align32") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+ }
if (IDVal == ".balign")
return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
if (IDVal == ".balignw")
@@ -662,7 +898,7 @@ bool AsmParser::ParseStatement() {
std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
DirectiveMap.lookup(IDVal);
if (Handler.first)
- return (Handler.first->*Handler.second)(IDVal, IDLoc);
+ return (*Handler.second)(Handler.first, IDVal, IDLoc);
// Target hook for parsing target specific directives.
if (!getTargetParser().ParseDirective(ID))
@@ -684,20 +920,29 @@ bool AsmParser::ParseStatement() {
if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
HadError = TokError("unexpected token in argument list");
+ // Dump the parsed representation, if requested.
+ if (getShowParsedOperands()) {
+ SmallString<256> Str;
+ raw_svector_ostream OS(Str);
+ OS << "parsed instruction: [";
+ for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
+ if (i != 0)
+ OS << ", ";
+ ParsedOperands[i]->dump(OS);
+ }
+ OS << "]";
+
+ PrintMessage(IDLoc, OS.str(), "note");
+ }
+
// If parsing succeeded, match the instruction.
if (!HadError) {
MCInst Inst;
- if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
+ if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) {
// Emit the instruction on success.
Out.EmitInstruction(Inst);
- } else {
- // Otherwise emit a diagnostic about the match failure and set the error
- // flag.
- //
- // FIXME: We should give nicer diagnostics about the exact failure.
- Error(IDLoc, "unrecognized instruction");
+ } else
HadError = true;
- }
}
// If there was no error, consume the end-of-statement token. Otherwise this
@@ -712,6 +957,132 @@ bool AsmParser::ParseStatement() {
return HadError;
}
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ const std::vector<std::vector<AsmToken> > &A)
+ : TheMacro(M), InstantiationLoc(IL), ExitLoc(EL)
+{
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ raw_svector_ostream OS(Buf);
+
+ StringRef Body = M->Body;
+ while (!Body.empty()) {
+ // Scan for the next substitution.
+ std::size_t End = Body.size(), Pos = 0;
+ for (; Pos != End; ++Pos) {
+ // Check for a substitution or escape.
+ if (Body[Pos] != '$' || Pos + 1 == End)
+ continue;
+
+ char Next = Body[Pos + 1];
+ if (Next == '$' || Next == 'n' || isdigit(Next))
+ break;
+ }
+
+ // Add the prefix.
+ OS << Body.slice(0, Pos);
+
+ // Check if we reached the end.
+ if (Pos == End)
+ break;
+
+ switch (Body[Pos+1]) {
+ // $$ => $
+ case '$':
+ OS << '$';
+ break;
+
+ // $n => number of arguments
+ case 'n':
+ OS << A.size();
+ break;
+
+ // $[0-9] => argument
+ default: {
+ // Missing arguments are ignored.
+ unsigned Index = Body[Pos+1] - '0';
+ if (Index >= A.size())
+ break;
+
+ // Otherwise substitute with the token values, with spaces eliminated.
+ for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ OS << it->getString();
+ break;
+ }
+ }
+
+ // Update the scan point.
+ Body = Body.substr(Pos + 2);
+ }
+
+ // We include the .endmacro in the buffer as our queue to exit the macro
+ // instantiation.
+ OS << ".endmacro\n";
+
+ Instantiation = MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+}
+
+bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
+ const Macro *M) {
+ // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+ // this, although we should protect against infinite loops.
+ if (ActiveMacros.size() == 20)
+ return TokError("macros cannot be nested more than 20 levels deep");
+
+ // Parse the macro instantiation arguments.
+ std::vector<std::vector<AsmToken> > MacroArguments;
+ MacroArguments.push_back(std::vector<AsmToken>());
+ unsigned ParenLevel = 0;
+ for (;;) {
+ if (Lexer.is(AsmToken::Eof))
+ return TokError("unexpected token in macro instantiation");
+ if (Lexer.is(AsmToken::EndOfStatement))
+ break;
+
+ // If we aren't inside parentheses and this is a comma, start a new token
+ // list.
+ if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+ MacroArguments.push_back(std::vector<AsmToken>());
+ } else {
+ // Adjust the current parentheses level.
+ if (Lexer.is(AsmToken::LParen))
+ ++ParenLevel;
+ else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+ --ParenLevel;
+
+ // Append the token to the current argument list.
+ MacroArguments.back().push_back(getTok());
+ }
+ Lex();
+ }
+
+ // Create the macro instantiation object and add to the current macro
+ // instantiation stack.
+ MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+ getTok().getLoc(),
+ MacroArguments);
+ ActiveMacros.push_back(MI);
+
+ // Jump to the macro instantiation and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lex();
+
+ return false;
+}
+
+void AsmParser::HandleMacroExit() {
+ // Jump to the EndOfStatement we should return to, and consume it.
+ JumpToLoc(ActiveMacros.back()->ExitLoc);
+ Lex();
+
+ // Pop the instantiation entry.
+ delete ActiveMacros.back();
+ ActiveMacros.pop_back();
+}
+
bool AsmParser::ParseAssignment(StringRef Name) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
@@ -760,6 +1131,30 @@ bool AsmParser::ParseAssignment(StringRef Name) {
/// ::= identifier
/// ::= string
bool AsmParser::ParseIdentifier(StringRef &Res) {
+ // The assembler has relaxed rules for accepting identifiers, in particular we
+ // allow things like '.globl $foo', which would normally be separate
+ // tokens. At this level, we have already lexed so we cannot (currently)
+ // handle this as a context dependent token, instead we detect adjacent tokens
+ // and return the combined identifier.
+ if (Lexer.is(AsmToken::Dollar)) {
+ SMLoc DollarLoc = getLexer().getLoc();
+
+ // Consume the dollar sign, and check for a following identifier.
+ Lex();
+ if (Lexer.isNot(AsmToken::Identifier))
+ return true;
+
+ // We have a '$' followed by an identifier, make sure they are adjacent.
+ if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ return true;
+
+ // Construct the joined identifier and consume the token.
+ Res = StringRef(DollarLoc.getPointer(),
+ getTok().getIdentifier().size() + 1);
+ Lex();
+ return false;
+ }
+
if (Lexer.isNot(AsmToken::Identifier) &&
Lexer.isNot(AsmToken::String))
return true;
@@ -1081,13 +1476,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool UseCodeAlign = false;
if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
getStreamer().getCurrentSection()))
- UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
- getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+ MaxBytesToFill);
}
return false;
@@ -1238,31 +1634,22 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
}
/// ParseDirectiveAbort
-/// ::= .abort [ "abort_string" ]
+/// ::= .abort [... message ...]
bool AsmParser::ParseDirectiveAbort() {
// FIXME: Use loc from directive.
SMLoc Loc = getLexer().getLoc();
- StringRef Str = "";
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::String))
- return TokError("expected string in '.abort' directive");
-
- Str = getTok().getString();
-
- Lex();
- }
-
+ StringRef Str = ParseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.abort' directive");
-
+
Lex();
- // FIXME: Handle here.
if (Str.empty())
Error(Loc, ".abort detected. Assembly stopping.");
else
Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+ // FIXME: Actually abort assembly here.
return false;
}
@@ -1286,9 +1673,7 @@ bool AsmParser::ParseDirectiveInclude() {
// Attempt to switch the lexer to the included file before consuming the end
// of statement to avoid losing it when we switch.
if (EnterIncludeFile(Filename)) {
- PrintMessage(IncludeLoc,
- "Could not find include file '" + Filename + "'",
- "error");
+ Error(IncludeLoc, "Could not find include file '" + Filename + "'");
return true;
}
@@ -1401,6 +1786,7 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
+ SMLoc FileNumberLoc = getLexer().getLoc();
if (getLexer().is(AsmToken::Integer)) {
FileNumber = getTok().getIntVal();
Lex();
@@ -1421,8 +1807,11 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
- else
+ else {
+ if (getContext().GetDwarfFile(Filename, FileNumber) == 0)
+ Error(FileNumberLoc, "file number already allocated");
getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
+ }
return false;
}
@@ -1449,40 +1838,193 @@ bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
/// ParseDirectiveLoc
-/// ::= .loc number [number [number]]
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified). The remaining
+/// optional items are .loc sub-directives.
bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
-
- // FIXME: What are these fields?
int64_t FileNumber = getTok().getIntVal();
- (void) FileNumber;
- // FIXME: Validate file.
-
+ if (FileNumber < 1)
+ return TokError("file number less than one in '.loc' directive");
+ if (!getContext().ValidateDwarfFileNumber(FileNumber))
+ return TokError("unassigned file number in '.loc' directive");
Lex();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::Integer))
- return TokError("unexpected token in '.loc' directive");
- int64_t Param2 = getTok().getIntVal();
- (void) Param2;
+ int64_t LineNumber = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ LineNumber = getTok().getIntVal();
+ if (LineNumber < 1)
+ return TokError("line number less than one in '.loc' directive");
Lex();
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::Integer))
+ int64_t ColumnPos = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ ColumnPos = getTok().getIntVal();
+ if (ColumnPos < 0)
+ return TokError("column position less than zero in '.loc' directive");
+ Lex();
+ }
+
+ unsigned Flags = 0;
+ unsigned Isa = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+ if (getParser().ParseIdentifier(Name))
return TokError("unexpected token in '.loc' directive");
- int64_t Param3 = getTok().getIntVal();
- (void) Param3;
- Lex();
+ if (Name == "basic_block")
+ Flags |= DWARF2_FLAG_BASIC_BLOCK;
+ else if (Name == "prologue_end")
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ else if (Name == "epilogue_begin")
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ else if (Name == "is_stmt") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be the constant 0 or 1.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value == 0)
+ Flags &= ~DWARF2_FLAG_IS_STMT;
+ else if (Value == 1)
+ Flags |= DWARF2_FLAG_IS_STMT;
+ else
+ return Error(Loc, "is_stmt value not 0 or 1");
+ }
+ else {
+ return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+ }
+ }
+ else if (Name == "isa") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be a constant greater or equal to 0.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value < 0)
+ return Error(Loc, "isa number less than zero");
+ Isa = Value;
+ }
+ else {
+ return Error(Loc, "isa number not a constant value");
+ }
+ }
+ else {
+ return Error(Loc, "unknown sub-directive in '.loc' directive");
+ }
- // FIXME: Do something with the .loc.
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
}
}
+ getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa);
+
+ return false;
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
+ SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.file' directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token in '" + Directive + "' directive");
+
+ getParser().MacrosEnabled = Directive == ".macros_on";
return false;
}
+/// ParseDirectiveMacro
+/// ::= .macro name
+bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.macro' directive");
+
+ // Eat the end of statement.
+ Lex();
+
+ AsmToken EndToken, StartToken = getTok();
+
+ // Lex the macro definition.
+ for (;;) {
+ // Check whether we have reached the end of the file.
+ if (getLexer().is(AsmToken::Eof))
+ return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+ // Otherwise, check whether we have reach the .endmacro.
+ if (getLexer().is(AsmToken::Identifier) &&
+ (getTok().getIdentifier() == ".endm" ||
+ getTok().getIdentifier() == ".endmacro")) {
+ EndToken = getTok();
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + EndToken.getIdentifier() +
+ "' directive");
+ break;
+ }
+
+ // Otherwise, scan til the end of the statement.
+ getParser().EatToEndOfStatement();
+ }
+
+ if (getParser().MacroMap.lookup(Name)) {
+ return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+ }
+
+ const char *BodyStart = StartToken.getLoc().getPointer();
+ const char *BodyEnd = EndToken.getLoc().getPointer();
+ StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+ getParser().MacroMap[Name] = new Macro(Name, Body);
+ return false;
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Directive + "' directive");
+
+ // If we are inside a macro instantiation, terminate the current
+ // instantiation.
+ if (!getParser().ActiveMacros.empty()) {
+ getParser().HandleMacroExit();
+ return false;
+ }
+
+ // Otherwise, this .endmacro is a stray entry in the file; well formed
+ // .endmacro directives are handled during the macro definition parsing.
+ return TokError("unexpected '" + Directive + "' in file, "
+ "no current macro definition");
+}
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+ MCContext &C, MCStreamer &Out,
+ const MCAsmInfo &MAI) {
+ return new AsmParser(T, SM, C, Out, MAI);
+}
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 7d8639ea4d81..54ddb449b285 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -25,6 +25,12 @@ namespace {
/// \brief Implementation of directive handling which is shared across all
/// Darwin targets.
class DarwinAsmParser : public MCAsmParserExtension {
+ template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<DarwinAsmParser, Handler>);
+ }
+
bool ParseSectionSwitch(const char *Segment, const char *Section,
unsigned TAA = 0, unsigned ImplicitAlign = 0,
unsigned StubSize = 0);
@@ -36,168 +42,70 @@ public:
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
- Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDesc));
- Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveLsym));
- Parser.AddDirectiveHandler(this, ".subsections_via_symbols",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols));
- Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDumpOrLoad));
- Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDumpOrLoad));
- Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSection));
- Parser.AddDirectiveHandler(this, ".secure_log_unique",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSecureLogUnique));
- Parser.AddDirectiveHandler(this, ".secure_log_reset",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSecureLogReset));
- Parser.AddDirectiveHandler(this, ".tbss",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveTBSS));
- Parser.AddDirectiveHandler(this, ".zerofill",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveZerofill));
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+ ".subsections_via_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+ ".secure_log_unique");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+ ".secure_log_reset");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
// Special section directives.
- Parser.AddDirectiveHandler(this, ".const",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConst));
- Parser.AddDirectiveHandler(this, ".const_data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConstData));
- Parser.AddDirectiveHandler(this, ".constructor",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConstructor));
- Parser.AddDirectiveHandler(this, ".cstring",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveCString));
- Parser.AddDirectiveHandler(this, ".data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveData));
- Parser.AddDirectiveHandler(this, ".destructor",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveDestructor));
- Parser.AddDirectiveHandler(this, ".dyld",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveDyld));
- Parser.AddDirectiveHandler(this, ".fvmlib_init0",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0));
- Parser.AddDirectiveHandler(this, ".fvmlib_init1",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1));
- Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers));
- Parser.AddDirectiveHandler(this, ".literal16",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral16));
- Parser.AddDirectiveHandler(this, ".literal4",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral4));
- Parser.AddDirectiveHandler(this, ".literal8",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral8));
- Parser.AddDirectiveHandler(this, ".mod_init_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveModInitFunc));
- Parser.AddDirectiveHandler(this, ".mod_term_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveModTermFunc));
- Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers));
- Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth));
- Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth));
- Parser.AddDirectiveHandler(this, ".objc_category",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCategory));
- Parser.AddDirectiveHandler(this, ".objc_class",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClass));
- Parser.AddDirectiveHandler(this, ".objc_class_names",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClassNames));
- Parser.AddDirectiveHandler(this, ".objc_class_vars",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClassVars));
- Parser.AddDirectiveHandler(this, ".objc_cls_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth));
- Parser.AddDirectiveHandler(this, ".objc_cls_refs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs));
- Parser.AddDirectiveHandler(this, ".objc_inst_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth));
- Parser.AddDirectiveHandler(this, ".objc_instance_vars",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars));
- Parser.AddDirectiveHandler(this, ".objc_message_refs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs));
- Parser.AddDirectiveHandler(this, ".objc_meta_class",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass));
- Parser.AddDirectiveHandler(this, ".objc_meth_var_names",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames));
- Parser.AddDirectiveHandler(this, ".objc_meth_var_types",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes));
- Parser.AddDirectiveHandler(this, ".objc_module_info",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo));
- Parser.AddDirectiveHandler(this, ".objc_protocol",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCProtocol));
- Parser.AddDirectiveHandler(this, ".objc_selector_strs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs));
- Parser.AddDirectiveHandler(this, ".objc_string_object",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCStringObject));
- Parser.AddDirectiveHandler(this, ".objc_symbols",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCSymbols));
- Parser.AddDirectiveHandler(this, ".picsymbol_stub",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectivePICSymbolStub));
- Parser.AddDirectiveHandler(this, ".static_const",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveStaticConst));
- Parser.AddDirectiveHandler(this, ".static_data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveStaticData));
- Parser.AddDirectiveHandler(this, ".symbol_stub",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveSymbolStub));
- Parser.AddDirectiveHandler(this, ".tdata",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveTData));
- Parser.AddDirectiveHandler(this, ".text",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveText));
- Parser.AddDirectiveHandler(this, ".thread_init_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc));
- Parser.AddDirectiveHandler(this, ".tlv",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveTLV));
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
}
bool ParseDirectiveDesc(StringRef, SMLoc);
bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
bool ParseDirectiveLsym(StringRef, SMLoc);
- bool ParseDirectiveSection();
+ bool ParseDirectiveSection(StringRef, SMLoc);
bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
@@ -493,7 +401,7 @@ bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
/// ParseDirectiveSection:
/// ::= .section identifier (',' identifier)*
-bool DarwinAsmParser::ParseDirectiveSection() {
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
SMLoc Loc = getLexer().getLoc();
StringRef SectionName;
@@ -537,28 +445,22 @@ bool DarwinAsmParser::ParseDirectiveSection() {
}
/// ParseDirectiveSecureLogUnique
-/// ::= .secure_log_unique "log message"
+/// ::= .secure_log_unique ... message ...
bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
- std::string LogMessage;
-
- if (getLexer().isNot(AsmToken::String))
- LogMessage = "";
- else{
- LogMessage = getTok().getString();
- Lex();
- }
-
+ StringRef LogMessage = getParser().ParseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.secure_log_unique' directive");
if (getContext().getSecureLogUsed() != false)
return Error(IDLoc, ".secure_log_unique specified multiple times");
- char *SecureLogFile = getContext().getSecureLogFile();
+ // Get the secure log path.
+ const char *SecureLogFile = getContext().getSecureLogFile();
if (SecureLogFile == NULL)
return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
"environment variable unset.");
+ // Open the secure log file if we haven't already.
raw_ostream *OS = getContext().getSecureLog();
if (OS == NULL) {
std::string Err;
@@ -571,6 +473,7 @@ bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
getContext().setSecureLog(OS);
}
+ // Write the message.
int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
*OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
<< ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 7a54dd39aa47..f982fdaecb12 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -8,15 +8,24 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/Twine.h"
using namespace llvm;
namespace {
class ELFAsmParser : public MCAsmParserExtension {
+ template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<ELFAsmParser, Handler>);
+ }
+
bool ParseSectionSwitch(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind);
@@ -27,10 +36,21 @@ public:
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
- Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler(
- &ELFAsmParser::ParseSectionDirectiveData));
- Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler(
- &ELFAsmParser::ParseSectionDirectiveText));
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
}
bool ParseSectionDirectiveData(StringRef, SMLoc) {
@@ -43,6 +63,56 @@ public:
MCSectionELF::SHF_EXECINSTR |
MCSectionELF::SHF_ALLOC, SectionKind::getText());
}
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE |
+ MCSectionELF::SHF_ALLOC, SectionKind::getBSS());
+ }
+ bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ SectionKind::getThreadData());
+ }
+ bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+ }
+ bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+ }
+ bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+ }
+ bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+ return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseDirectiveLEB128(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectiveSize(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
};
}
@@ -59,6 +129,159 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
return false;
}
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getStreamer().EmitELFSize(Sym, Expr);
+ return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ StringRef SectionName;
+ // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly.
+ if (getParser().ParseIdentifier(SectionName))
+ return TokError("expected identifier in directive");
+
+ std::string FlagsStr;
+ StringRef TypeName;
+ int64_t Size = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in directive");
+
+ FlagsStr = getTok().getStringContents();
+ Lex();
+
+ AsmToken::TokenKind TypeStartToken;
+ if (getContext().getAsmInfo().getCommentString()[0] == '@')
+ TypeStartToken = AsmToken::Percent;
+ else
+ TypeStartToken = AsmToken::At;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (getLexer().is(TypeStartToken)) {
+ Lex();
+ if (getParser().ParseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ if (Size <= 0)
+ return TokError("section size must be positive");
+ }
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ unsigned Flags = 0;
+ for (unsigned i = 0; i < FlagsStr.size(); i++) {
+ switch (FlagsStr[i]) {
+ case 'a':
+ Flags |= MCSectionELF::SHF_ALLOC;
+ break;
+ case 'x':
+ Flags |= MCSectionELF::SHF_EXECINSTR;
+ break;
+ case 'w':
+ Flags |= MCSectionELF::SHF_WRITE;
+ break;
+ case 'M':
+ Flags |= MCSectionELF::SHF_MERGE;
+ break;
+ case 'S':
+ Flags |= MCSectionELF::SHF_STRINGS;
+ break;
+ case 'T':
+ Flags |= MCSectionELF::SHF_TLS;
+ break;
+ case 'c':
+ Flags |= MCSectionELF::XCORE_SHF_CP_SECTION;
+ break;
+ case 'd':
+ Flags |= MCSectionELF::XCORE_SHF_DP_SECTION;
+ break;
+ default:
+ return TokError("unknown flag");
+ }
+ }
+
+ unsigned Type = MCSectionELF::SHT_NULL;
+ if (!TypeName.empty()) {
+ if (TypeName == "init_array")
+ Type = MCSectionELF::SHT_INIT_ARRAY;
+ else if (TypeName == "fini_array")
+ Type = MCSectionELF::SHT_FINI_ARRAY;
+ else if (TypeName == "preinit_array")
+ Type = MCSectionELF::SHT_PREINIT_ARRAY;
+ else if (TypeName == "nobits")
+ Type = MCSectionELF::SHT_NOBITS;
+ else if (TypeName == "progbits")
+ Type = MCSectionELF::SHT_PROGBITS;
+ else
+ return TokError("unknown section type");
+ }
+
+ SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR)
+ ? SectionKind::getText()
+ : SectionKind::getDataRel();
+ getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+ Flags, Kind, false));
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+ int64_t Value;
+ if (getParser().ParseAbsoluteExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ // FIXME: Add proper MC support.
+ if (getContext().getAsmInfo().hasLEB128()) {
+ if (DirName[1] == 's')
+ getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value));
+ else
+ getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value));
+ return false;
+ }
+ // FIXME: This shouldn't be an error!
+ return TokError("LEB128 not supported yet");
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection != NULL)
+ getStreamer().SwitchSection(PreviousSection);
+
+ return false;
+}
+
namespace llvm {
MCAsmParserExtension *createELFAsmParser() {
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index bee30641c7fc..70295efc613c 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -12,19 +12,26 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
-MCAsmParser::MCAsmParser() {
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
}
MCAsmParser::~MCAsmParser() {
}
+void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+ assert(!TargetParser && "Target parser is already initialized!");
+ TargetParser = &P;
+ TargetParser->Initialize(*this);
+}
+
const AsmToken &MCAsmParser::getTok() {
return getLexer().getTok();
}
-bool MCAsmParser::TokError(const char *Msg) {
+bool MCAsmParser::TokError(const Twine &Msg) {
Error(getLexer().getLoc(), Msg);
return true;
}
@@ -34,8 +41,4 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
return ParseExpression(Res, L);
}
-/// getStartLoc - Get the location of the first token of this operand.
-SMLoc MCParsedAsmOperand::getStartLoc() const { return SMLoc(); }
-SMLoc MCParsedAsmOperand::getEndLoc() const { return SMLoc(); }
-
diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp
index 05760c96cc65..8d43c21f4bc9 100644
--- a/lib/MC/MCParser/TargetAsmParser.cpp
+++ b/lib/MC/MCParser/TargetAsmParser.cpp
@@ -11,7 +11,7 @@
using namespace llvm;
TargetAsmParser::TargetAsmParser(const Target &T)
- : TheTarget(T)
+ : TheTarget(T), AvailableFeatures(0)
{
}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 573f2a3530ee..3e9d02ea5ae7 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,7 +15,8 @@
#include <cstdlib>
using namespace llvm;
-MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) {
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0),
+ PrevSection(0) {
}
MCStreamer::~MCStreamer() {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 7ca09511bdeb..cffabfadb316 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -769,7 +769,7 @@ public:
IsPCRel = 1;
FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
Target.getConstant());
- FixedValue += 1 << Log2Size;
+ FixedValue += 1ULL << Log2Size;
} else {
FixedValue = 0;
}
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index a661fa6f4080..bf8b7c0e7831 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCParser
+PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 6804766b2895..eeb2b9675f4b 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -12,41 +12,552 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "WinCOFFObjectWriter"
+
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionCOFF.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/System/TimeValue.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <cstdio>
+
using namespace llvm;
namespace {
+typedef llvm::SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+ ATFunctionDefinition,
+ ATbfAndefSymbol,
+ ATWeakExternal,
+ ATFile,
+ ATSectionDefinition
+};
+
+struct AuxSymbol {
+ AuxiliaryType AuxType;
+ COFF::Auxiliary Aux;
+};
+
+class COFFSymbol {
+public:
+ COFF::symbol Data;
+
+ typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+ name Name;
+ size_t Index;
+ AuxiliarySymbols Aux;
+ COFFSymbol *Other;
+
+ MCSymbolData const *MCData;
+
+ COFFSymbol(llvm::StringRef name, size_t index);
+ size_t size() const;
+ void set_name_offset(uint32_t Offset);
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+ COFF::relocation Data;
+ COFFSymbol *Symb;
+
+ COFFRelocation() : Symb(NULL) {}
+ static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+ COFF::section Header;
+
+ std::string Name;
+ size_t Number;
+ MCSectionData const *MCData;
+ COFFSymbol *Symb;
+ relocations Relocations;
+
+ COFFSection(llvm::StringRef name, size_t Index);
+ static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+ typedef llvm::StringMap<size_t> map;
+ map Map;
+
+ void update_length();
+public:
+ std::vector<char> Data;
+
+ StringTable();
+ size_t size() const;
+ size_t insert(llvm::StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+ typedef std::vector<COFFSymbol*> symbols;
+ typedef std::vector<COFFSection*> sections;
+
+ typedef StringMap<COFFSymbol *> name_symbol_map;
+ typedef StringMap<COFFSection *> name_section_map;
+
+ typedef DenseMap<MCSymbolData const *, COFFSymbol *> symbol_map;
+ typedef DenseMap<MCSectionData const *, COFFSection *> section_map;
+
+ // Root level file contents.
+ bool Is64Bit;
+ COFF::header Header;
+ sections Sections;
+ symbols Symbols;
+ StringTable Strings;
+
+ // Maps used during object file creation.
+ section_map SectionMap;
+ symbol_map SymbolMap;
+
+ WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+ ~WinCOFFObjectWriter();
+
+ COFFSymbol *createSymbol(llvm::StringRef Name);
+ COFFSection *createSection(llvm::StringRef Name);
+
+ void InitCOFFEntity(COFFSymbol &Symbol);
+ void InitCOFFEntity(COFFSection &Section);
+
+ template <typename object_t, typename list_t>
+ object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+
+ void DefineSection(MCSectionData const &SectionData);
+ void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+
+ bool ExportSection(COFFSection *S);
+ bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+ // Entity writing methods.
+
+ void WriteFileHeader(const COFF::header &Header);
+ void WriteSymbol(const COFFSymbol *S);
+ void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+ void WriteSectionHeader(const COFF::section &S);
+ void WriteRelocation(const COFF::relocation &R);
+
+ // MCObjectWriter interface implementation.
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+ void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x000000FF) >> 0;
+ Ptr[1] = (Value & 0x0000FF00) >> 8;
+ Ptr[2] = (Value & 0x00FF0000) >> 16;
+ Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x00FF) >> 0;
+ Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0xFF) >> 0;
+}
- class WinCOFFObjectWriter : public MCObjectWriter {
- public:
- WinCOFFObjectWriter(raw_ostream &OS);
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index)
+ : Name(name.begin(), name.end()), Index(-1)
+ , Other(NULL), MCData(NULL) {
+ memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+ return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+ write_uint32_le(Data.Name + 0, 0);
+ write_uint32_le(Data.Name + 4, Offset);
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(llvm::StringRef name, size_t Index)
+ : Name(name), Number(Index + 1)
+ , MCData(NULL), Symb(NULL) {
+ memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+ return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+ write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+ // The string table data begins with the length of the entire string table
+ // including the length header. Allocate space for this header.
+ Data.resize(4);
+}
+
+size_t StringTable::size() const {
+ return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(llvm::StringRef String) {
+ map::iterator i = Map.find(String);
+
+ if (i != Map.end())
+ return i->second;
+
+ size_t Offset = Data.size();
- // MCObjectWriter interface implementation.
+ // Insert string data into string table.
+ Data.insert(Data.end(), String.begin(), String.end());
+ Data.push_back('\0');
- void ExecutePostLayoutBinding(MCAssembler &Asm);
+ // Put a reference to it in the map.
+ Map[String] = Offset;
- void RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue);
+ // Update the internal length field.
+ update_length();
- void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
- };
+ return Offset;
}
-WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS)
- : MCObjectWriter(OS, true) {
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+ : MCObjectWriter(OS, true)
+ , Is64Bit(is64Bit) {
+ memset(&Header, 0, sizeof(Header));
+
+ Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
+ : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+ for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+ delete *I;
+ for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+ delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// This function initializes a symbol by entering its name into the string
+/// table if it is too long to fit in the symbol table header.
+void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ S.set_name_offset(StringTableEntry);
+ } else
+ memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+}
+
+/// This function initializes a section by entering its name into the string
+/// table if it is too long to fit in the section table header.
+void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ // FIXME: Why is this number 999999? This number is never mentioned in the
+ // spec. I'm assuming this is due to the printed value needing to fit into
+ // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+ // 6)? The spec does not state if this entry should be null terminated in
+ // this case, and thus this seems to be the best way to do it. I think I
+ // just solved my own FIXME...
+ if (StringTableEntry > 999999)
+ report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+ sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry);
+ } else
+ memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+ list_t &List) {
+ object_t *Object = new object_t(Name, List.size());
+
+ InitCOFFEntity(*Object);
+
+ List.push_back(Object);
+
+ return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+ // FIXME: Not sure how to verify this (at least in a debug build).
+ MCSectionCOFF const &Sec =
+ static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+ COFFSection *coff_section = createSection(Sec.getSectionName());
+ COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
+
+ coff_section->Symb = coff_symbol;
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+ coff_symbol->Data.SectionNumber = coff_section->Number;
+
+ // In this case the auxiliary symbol is a Section Definition.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+ coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+ uint32_t &Characteristics = coff_section->Header.Characteristics;
+ switch (SectionData.getAlignment()) {
+ case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break;
+ case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break;
+ case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break;
+ case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break;
+ case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break;
+ case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break;
+ case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break;
+ case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break;
+ case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break;
+ case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break;
+ case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+ case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+ case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+ case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+ default:
+ llvm_unreachable("unsupported section alignment");
+ }
+
+ // Bind internal COFF section to MC section.
+ coff_section->MCData = &SectionData;
+ SectionMap[&SectionData] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Assembler) {
+ COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName());
+
+ coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
+ coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+ // If no storage class was specified in the streamer, define it here.
+ if (coff_symbol->Data.StorageClass == 0) {
+ bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+ coff_symbol->Data.StorageClass =
+ external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ }
+
+ if (SymbolData.getFlags() & COFF::SF_WeakReference) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+ // FIXME: This assert message isn't very good.
+ assert(Value->getKind() == MCExpr::SymbolRef &&
+ "Value must be a SymbolRef!");
+
+ const MCSymbolRefExpr *SymbolRef =
+ static_cast<const MCSymbolRefExpr *>(Value);
+
+ const MCSymbolData &OtherSymbolData =
+ Assembler.getSymbolData(SymbolRef->getSymbol());
+
+ // FIXME: This assert message isn't very good.
+ assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() &&
+ "OtherSymbolData must be in the symbol map!");
+
+ coff_symbol->Other = SymbolMap[&OtherSymbolData];
+
+ // Setup the Weak External auxiliary symbol.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATWeakExternal;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ }
+
+ // Bind internal COFF symbol to MC symbol.
+ coff_symbol->MCData = &SymbolData;
+ SymbolMap[&SymbolData] = coff_symbol;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection *S) {
+ return (S->Header.Characteristics
+ & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Asm) {
+ // This doesn't seem to be right. Strings referred to from the .data section
+ // need symbols so they can be linked to code in the .text section right?
+
+ // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+ // For now, all symbols are exported, the linker will sort it out for us.
+ return true;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+ WriteLE16(Header.Machine);
+ WriteLE16(Header.NumberOfSections);
+ WriteLE32(Header.TimeDateStamp);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ WriteLE16(Header.SizeOfOptionalHeader);
+ WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+ WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+ WriteLE32(S->Data.Value);
+ WriteLE16(S->Data.SectionNumber);
+ WriteLE16(S->Data.Type);
+ Write8(S->Data.StorageClass);
+ Write8(S->Data.NumberOfAuxSymbols);
+ WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+ const COFFSymbol::AuxiliarySymbols &S) {
+ for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+ i != e; ++i) {
+ switch(i->AuxType) {
+ case ATFunctionDefinition:
+ WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+ WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+ break;
+ case ATbfAndefSymbol:
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+ WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+ WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+ break;
+ case ATWeakExternal:
+ WriteLE32(i->Aux.WeakExternal.TagIndex);
+ WriteLE32(i->Aux.WeakExternal.Characteristics);
+ WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+ break;
+ case ATFile:
+ WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+ sizeof(i->Aux.File.FileName)));
+ break;
+ case ATSectionDefinition:
+ WriteLE32(i->Aux.SectionDefinition.Length);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+ WriteLE32(i->Aux.SectionDefinition.CheckSum);
+ WriteLE16(i->Aux.SectionDefinition.Number);
+ Write8(i->Aux.SectionDefinition.Selection);
+ WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+ break;
+ }
+ }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+ WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+ WriteLE32(S.VirtualSize);
+ WriteLE32(S.VirtualAddress);
+ WriteLE32(S.SizeOfRawData);
+ WriteLE32(S.PointerToRawData);
+ WriteLE32(S.PointerToRelocations);
+ WriteLE32(S.PointerToLineNumbers);
+ WriteLE16(S.NumberOfRelocations);
+ WriteLE16(S.NumberOfLineNumbers);
+ WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+ WriteLE32(R.VirtualAddress);
+ WriteLE32(R.SymbolTableIndex);
+ WriteLE16(R.Type);
}
////////////////////////////////////////////////////////////////////////////////
// MCObjectWriter interface implementations
void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+ // "Define" each section & symbol. This creates section & symbol
+ // entries in the staging area and gives them their final indexes.
+
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+ DefineSection(*i);
+
+ for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+ e = Asm.symbol_end(); i != e; i++) {
+ if (ExportSymbol(*i, Asm))
+ DefineSymbol(*i, Asm);
+ }
}
void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
@@ -55,17 +566,209 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
MCValue Target,
uint64_t &FixedValue) {
+ assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+ MCSectionData const *SectionData = Fragment->getParent();
+
+ // Mark this symbol as requiring an entry in the symbol table.
+ assert(SectionMap.find(SectionData) != SectionMap.end() &&
+ "Section must already have been defined in ExecutePostLayoutBinding!");
+ assert(SymbolMap.find(&A_SD) != SymbolMap.end() &&
+ "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+ COFFSection *coff_section = SectionMap[SectionData];
+ COFFSymbol *coff_symbol = SymbolMap[&A_SD];
+
+ if (Target.getSymB()) {
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+ FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD);
+
+ // In the case where we have SymbA and SymB, we just need to store the delta
+ // between the two symbols. Update FixedValue to account for the delta, and
+ // skip recording the relocation.
+ return;
+ } else {
+ FixedValue = Target.getConstant();
+ }
+
+ COFFRelocation Reloc;
+
+ Reloc.Data.SymbolTableIndex = 0;
+ Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+ Reloc.Symb = coff_symbol;
+
+ Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+ switch (Fixup.getKind()) {
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
+ : COFF::IMAGE_REL_I386_REL32;
+ // FIXME: Can anyone explain what this does other than adjust for the size
+ // of the offset?
+ FixedValue += 4;
+ break;
+ case FK_Data_4:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
+ : COFF::IMAGE_REL_I386_DIR32;
+ break;
+ case FK_Data_8:
+ if (Is64Bit)
+ Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
+ else
+ llvm_unreachable("unsupported relocation type");
+ break;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+
+ coff_section->Relocations.push_back(Reloc);
}
void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ // Assign symbol and section indexes and offsets.
+
+ Header.NumberOfSymbols = 0;
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+ coff_symbol->Index = Header.NumberOfSymbols++;
+
+ // Update section number & offset for symbols that have them.
+ if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+ COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()];
+
+ coff_symbol->Data.SectionNumber = coff_section->Number;
+ coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+ + SymbolData->Offset;
+ }
+
+ // Update auxiliary symbol info.
+ coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+ Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ }
+
+ // Fixup weak external references.
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *symb = *i;
+
+ if (symb->Other != NULL) {
+ assert(symb->Aux.size() == 1 &&
+ "Symbol must contain one aux symbol!");
+ assert(symb->Aux[0].AuxType == ATWeakExternal &&
+ "Symbol's aux symbol must be a Weak External!");
+ symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index;
+ }
+ }
+
+ // Assign file offsets to COFF object file structures.
+
+ unsigned offset = 0;
+
+ offset += COFF::HeaderSize;
+ offset += COFF::SectionSize * Asm.size();
+
+ Header.NumberOfSections = Sections.size();
+
+ for (MCAssembler::const_iterator i = Asm.begin(),
+ e = Asm.end();
+ i != e; i++) {
+ COFFSection *Sec = SectionMap[i];
+
+ Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i);
+
+ if (ExportSection(Sec)) {
+ Sec->Header.PointerToRawData = offset;
+
+ offset += Sec->Header.SizeOfRawData;
+ }
+
+ if (Sec->Relocations.size() > 0) {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ Sec->Header.PointerToRelocations = offset;
+
+ offset += COFF::RelocationSize * Sec->Relocations.size();
+
+ for (relocations::iterator cr = Sec->Relocations.begin(),
+ er = Sec->Relocations.end();
+ cr != er; cr++) {
+ (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+ }
+ }
+
+ assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symb->Aux[0];
+ assert(Aux.AuxType == ATSectionDefinition &&
+ "Section's symbol's aux symbol must be a Section Definition!");
+ Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+ Aux.Aux.SectionDefinition.NumberOfRelocations =
+ Sec->Header.NumberOfRelocations;
+ Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+ Sec->Header.NumberOfLineNumbers;
+ }
+
+ Header.PointerToSymbolTable = offset;
+
+ Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+ // Write it all to disk...
+ WriteFileHeader(Header);
+
+ {
+ sections::iterator i, ie;
+ MCAssembler::const_iterator j, je;
+
+ for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+ WriteSectionHeader((*i)->Header);
+
+ for (i = Sections.begin(), ie = Sections.end(),
+ j = Asm.begin(), je = Asm.end();
+ (i != ie) && (j != je); i++, j++) {
+ if ((*i)->Header.PointerToRawData != 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRawData &&
+ "Section::PointerToRawData is insane!");
+
+ Asm.WriteSectionData(j, Layout, this);
+ }
+
+ if ((*i)->Relocations.size() > 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+ "Section::PointerToRelocations is insane!");
+
+ for (relocations::const_iterator k = (*i)->Relocations.begin(),
+ ke = (*i)->Relocations.end();
+ k != ke; k++) {
+ WriteRelocation(k->Data);
+ }
+ } else
+ assert((*i)->Header.PointerToRelocations == 0 &&
+ "Section::PointerToRelocations is insane!");
+ }
+ }
+
+ assert(OS.tell() == Header.PointerToSymbolTable &&
+ "Header::PointerToSymbolTable is insane!");
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+ WriteSymbol(*i);
+
+ OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
}
//------------------------------------------------------------------------------
// WinCOFFObjectWriter factory function
namespace llvm {
- MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) {
- return new WinCOFFObjectWriter(OS);
+ MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
+ return new WinCOFFObjectWriter(OS, is64Bit);
}
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 1030cdb28d2c..8a194bff2151 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -18,27 +18,34 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringMap.h"
+
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define dbg_notimpl(x) \
- do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \
- abort(); } while (false);
-
namespace {
class WinCOFFStreamer : public MCObjectStreamer {
public:
+ MCSymbol const *CurSymbol;
+
WinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
raw_ostream &OS);
+ void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External);
+
// MCStreamer interface
virtual void EmitLabel(MCSymbol *Symbol);
@@ -52,18 +59,18 @@ public:
virtual void EndCOFFSymbolDef();
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
+ unsigned ByteAlignment);
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- unsigned Size,unsigned ByteAlignment);
+ unsigned Size,unsigned ByteAlignment);
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment);
+ uint64_t Size, unsigned ByteAlignment);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace);
virtual void EmitGPRel32Value(const MCExpr *Value);
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
- unsigned ValueSize, unsigned MaxBytesToEmit);
+ unsigned ValueSize, unsigned MaxBytesToEmit);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit);
virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
@@ -78,96 +85,224 @@ WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
raw_ostream &OS)
- : MCObjectStreamer(Context, TAB, OS, &CE) {
+ : MCObjectStreamer(Context, TAB, OS, &CE)
+ , CurSymbol(NULL) {
+}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External) {
+ assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+ std::string SectionName(".bss$linkonce");
+ SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+ MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ unsigned Characteristics =
+ COFF::IMAGE_SCN_LNK_COMDAT |
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+ const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+ SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+ MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+ if (SectionData.getAlignment() < ByteAlignment)
+ SectionData.setAlignment(ByteAlignment);
+
+ SymbolData.setExternal(External);
+
+ Symbol->setSection(*Section);
+
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+ SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
}
// MCStreamer interface
void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+ // TODO: This is copied almost exactly from the MachOStreamer. Consider
+ // merging into MCObjectStreamer?
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(CurSection && "Cannot emit before setting section!");
+
+ Symbol->setSection(*CurSection);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(DF);
+ SD.setOffset(DF->getContents().size());
}
void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
- dbg_notimpl("Flag = " << Flag);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as MachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ AddValueSymbols(Value);
+ Symbol->setVariableValue(Value);
}
void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
+ switch (Attribute) {
+ case MCSA_WeakReference:
+ getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags(
+ COFF::SF_WeakReference,
+ COFF::SF_WeakReference);
+ break;
+
+ case MCSA_Global:
+ getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+ break;
+
+ default:
+ llvm_unreachable("unsupported attribute");
+ break;
+ }
}
void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
- dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+ "to BeginCOFFSymbolDef!");
+ CurSymbol = Symbol;
}
void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+ "the first byte!");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ StorageClass << COFF::SF_ClassShift,
+ COFF::SF_ClassMask);
}
void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+ "bytes");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ Type << COFF::SF_TypeShift,
+ COFF::SF_TypeMask);
}
void WinCOFFStreamer::EndCOFFSymbolDef() {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ CurSymbol = NULL;
}
void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
+ unsigned ByteAlignment) {
+ AddCommonSymbol(Symbol, Size, ByteAlignment, true);
}
void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ AddCommonSymbol(Symbol, Size, 1, false);
}
void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- unsigned Size,unsigned ByteAlignment) {
- MCSectionCOFF const *SectionCOFF =
- static_cast<MCSectionCOFF const *>(Section);
-
- dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
- Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
- << ByteAlignment);
+ unsigned Size,unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
- MCSectionCOFF const *SectionCOFF =
- static_cast<MCSectionCOFF const *>(Section);
-
- dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
- Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
- << ByteAlignment);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+ } else {
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+ }
}
void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) {
- dbg_notimpl("Value = '" << *Value);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
- int64_t Value,
- unsigned ValueSize,
- unsigned MaxBytesToEmit) {
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0) {
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0) {
- dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value);
+ unsigned char Value) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
@@ -176,11 +311,24 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
}
void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo,
- StringRef Filename) {
- dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'");
+ StringRef Filename) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+ for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
+ if (Instruction.getOperand(i).isExpr())
+ AddValueSymbols(Instruction.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ MCInstFragment *Fragment =
+ new MCInstFragment(Instruction, getCurrentSectionData());
+
+ raw_svector_ostream VecOS(Fragment->getCode());
+
+ getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
+ Fragment->getFixups());
}
void WinCOFFStreamer::Finish() {
@@ -192,7 +340,10 @@ namespace llvm
MCStreamer *createWinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
- raw_ostream &OS) {
- return new WinCOFFStreamer(Context, TAB, CE, OS);
+ raw_ostream &OS,
+ bool RelaxAll) {
+ WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+ S->getAssembler().setRelaxAll(RelaxAll);
+ return S;
}
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 2e7855701133..b87ddf9c95b5 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -153,6 +153,7 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
value += absExponent * 10;
if (absExponent >= overlargeExponent) {
absExponent = overlargeExponent;
+ p = end; /* outwit assert below */
break;
}
absExponent = value;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 262fa42ab2ce..8a212a291f24 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2123,15 +2123,16 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
char *BufPtr = Buffer+65;
uint64_t N;
- if (Signed) {
+ if (!Signed) {
+ N = getZExtValue();
+ } else {
int64_t I = getSExtValue();
- if (I < 0) {
+ if (I >= 0) {
+ N = I;
+ } else {
Str.push_back('-');
- I = -I;
+ N = -(uint64_t)I;
}
- N = I;
- } else {
- N = getZExtValue();
}
while (N) {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 366d2f799211..0c70a402654e 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport
circular_raw_ostream.cpp
CommandLine.cpp
ConstantRange.cpp
+ CrashRecoveryContext.cpp
Debug.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
@@ -23,7 +24,6 @@ add_llvm_library(LLVMSupport
PluginLoader.cpp
PrettyStackTrace.cpp
Regex.cpp
- SlowOperationInformer.cpp
SmallPtrSet.cpp
SmallVector.cpp
SourceMgr.cpp
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 2746f7aaaa5e..8ef3785f5331 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -21,6 +21,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Constants.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,7 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
/// Initialize a range to hold the single specified value.
///
-ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {}
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
Lower(L), Upper(U) {
@@ -202,14 +203,12 @@ bool ConstantRange::contains(const APInt &V) const {
}
/// contains - Return true if the argument is a subset of this range.
-/// Two equal set contain each other. The empty set is considered to be
-/// contained by all other sets.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
///
bool ConstantRange::contains(const ConstantRange &Other) const {
- if (isFullSet()) return true;
- if (Other.isFullSet()) return false;
- if (Other.isEmptySet()) return true;
- if (isEmptySet()) return false;
+ if (isFullSet() || Other.isEmptySet()) return true;
+ if (isEmptySet() || Other.isFullSet()) return false;
if (!isWrappedSet()) {
if (Other.isWrappedSet())
@@ -235,46 +234,6 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const {
return ConstantRange(Lower - Val, Upper - Val);
}
-
-// intersect1Wrapped - This helper function is used to intersect two ranges when
-// it is known that LHS is wrapped and RHS isn't.
-//
-ConstantRange
-ConstantRange::intersect1Wrapped(const ConstantRange &LHS,
- const ConstantRange &RHS) {
- assert(LHS.isWrappedSet() && !RHS.isWrappedSet());
-
- // Check to see if we overlap on the Left side of RHS...
- //
- if (RHS.Lower.ult(LHS.Upper)) {
- // We do overlap on the left side of RHS, see if we overlap on the right of
- // RHS...
- if (RHS.Upper.ugt(LHS.Lower)) {
- // Ok, the result overlaps on both the left and right sides. See if the
- // resultant interval will be smaller if we wrap or not...
- //
- if (LHS.getSetSize().ult(RHS.getSetSize()))
- return LHS;
- else
- return RHS;
-
- } else {
- // No overlap on the right, just on the left.
- return ConstantRange(RHS.Lower, LHS.Upper);
- }
- } else {
- // We don't overlap on the left side of RHS, see if we overlap on the right
- // of RHS...
- if (RHS.Upper.ugt(LHS.Lower)) {
- // Simple overlap...
- return ConstantRange(LHS.Lower, RHS.Upper);
- } else {
- // No overlap...
- return ConstantRange(LHS.getBitWidth(), false);
- }
- }
-}
-
/// intersectWith - Return the range that results from the intersection of this
/// range with another range. The resultant range is guaranteed to include all
/// elements contained in both input ranges, and to have the smallest possible
@@ -486,7 +445,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
assert(SrcTySize > DstTySize && "Not a value truncation");
APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
if (isFullSet() || getSetSize().ugt(Size))
- return ConstantRange(DstTySize);
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
APInt L = Lower; L.trunc(DstTySize);
APInt U = Upper; U.trunc(DstTySize);
@@ -539,6 +498,27 @@ ConstantRange::add(const ConstantRange &Other) const {
}
ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() - Other.getLower();
+ APInt NewUpper = getUpper() - Other.getUpper() + 1;
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
ConstantRange::multiply(const ConstantRange &Other) const {
// TODO: If either operand is a single element and the multiply is known to
// be non-wrapping, round the result min and max value to the appropriate
@@ -616,40 +596,42 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
}
ConstantRange
-ConstantRange::shl(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
+ConstantRange::shl(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
- APInt min = getUnsignedMin() << Amount.getUnsignedMin();
- APInt max = getUnsignedMax() << Amount.getUnsignedMax();
+ APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+ APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
// there's no overflow!
APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
- if (Zeros.uge(Amount.getUnsignedMax()))
- return ConstantRange(min, max);
+ if (Zeros.ugt(Other.getUnsignedMax()))
+ return ConstantRange(min, max + 1);
// FIXME: implement the other tricky cases
- return ConstantRange(getBitWidth());
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
}
ConstantRange
-ConstantRange::ashr(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
+ConstantRange::lshr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+ APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+ if (min == max + 1)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin());
- APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax());
- return ConstantRange(min, max);
+ return ConstantRange(min, max + 1);
}
-ConstantRange
-ConstantRange::lshr(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
-
- APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin());
- APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax());
- return ConstantRange(min, max);
+ConstantRange ConstantRange::inverse() const {
+ if (isFullSet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ } else if (isEmptySet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ }
+ return ConstantRange(Upper, Lower);
}
/// print - Print out the bounds to a stream...
@@ -668,5 +650,3 @@ void ConstantRange::print(raw_ostream &OS) const {
void ConstantRange::dump() const {
print(dbgs());
}
-
-
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 000000000000..49258ede83c1
--- /dev/null
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,204 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/ThreadLocal.h"
+#include <setjmp.h>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+ CrashRecoveryContext *CRC;
+ std::string Backtrace;
+ ::jmp_buf JumpBuffer;
+ volatile unsigned Failed : 1;
+
+public:
+ CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+ Failed(false) {
+ CurrentContext.set(this);
+ }
+ ~CrashRecoveryContextImpl() {
+ CurrentContext.erase();
+ }
+
+ void HandleCrash() {
+ // Eliminate the current context entry, to avoid re-entering in case the
+ // cleanup code crashes.
+ CurrentContext.erase();
+
+ assert(!Failed && "Crash recovery context already failed!");
+ Failed = true;
+
+ // FIXME: Stash the backtrace.
+
+ // Jump back to the RunSafely we were called under.
+ longjmp(JumpBuffer, 1);
+ }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ delete CRCI;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ if (!CRCI)
+ return 0;
+
+ return CRCI->CRC;
+}
+
+#ifdef LLVM_ON_WIN32
+
+// FIXME: No real Win32 implementation currently.
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // We didn't find a crash recovery context -- this means either we got a
+ // signal on a thread we didn't expect it on, the application got a signal
+ // outside of a crash recovery context, or something else went horribly
+ // wrong.
+ //
+ // Disable crash recovery and raise the signal again. The assumption here is
+ // that the enclosing application will terminate soon, and we won't want to
+ // attempt crash recovery again.
+ //
+ // This call of Disable isn't thread safe, but it doesn't actually matter.
+ CrashRecoveryContext::Disable();
+ raise(Signal);
+ }
+
+ // Unblock the signal we received.
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ sigaddset(&SigMask, Signal);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ if (CRCI)
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+
+ // Setup the signal handler.
+ struct sigaction Handler;
+ Handler.sa_handler = CrashRecoverySignalHandler;
+ Handler.sa_flags = 0;
+ sigemptyset(&Handler.sa_mask);
+
+ for (unsigned i = 0; i != NumSignals; ++i) {
+ sigaction(Signals[i], &Handler, &PrevActions[i]);
+ }
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+
+ // Restore the previous signal handlers.
+ for (unsigned i = 0; i != NumSignals; ++i)
+ sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+ // If crash recovery is disabled, do nothing.
+ if (gCrashRecoveryEnabled) {
+ assert(!Impl && "Crash recovery context already initialized!");
+ CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+ Impl = CRCI;
+
+ if (setjmp(CRCI->JumpBuffer) != 0) {
+ return false;
+ }
+ }
+
+ Fn(UserData);
+ return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ assert(CRCI && "Crash recovery context never initialized!");
+ CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+ CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+ assert(CRC && "Crash recovery context never initialized!");
+ assert(CRC->Failed && "No crash was detected!");
+ return CRC->Backtrace;
+}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 7e7ca9debe9a..0b7af3e5905b 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -18,8 +18,19 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
#include "llvm/System/Threading.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
#include <cassert>
#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
using namespace llvm;
using namespace std;
@@ -39,19 +50,26 @@ void llvm::remove_fatal_error_handler() {
ErrorHandler = 0;
}
-void llvm::report_fatal_error(const char *reason) {
- report_fatal_error(Twine(reason));
+void llvm::report_fatal_error(const char *Reason) {
+ report_fatal_error(Twine(Reason));
}
-void llvm::report_fatal_error(const std::string &reason) {
- report_fatal_error(Twine(reason));
+void llvm::report_fatal_error(const std::string &Reason) {
+ report_fatal_error(Twine(Reason));
}
-void llvm::report_fatal_error(const Twine &reason) {
- if (!ErrorHandler) {
- errs() << "LLVM ERROR: " << reason << "\n";
+void llvm::report_fatal_error(const Twine &Reason) {
+ if (ErrorHandler) {
+ ErrorHandler(ErrorHandlerUserData, Reason.str());
} else {
- ErrorHandler(ErrorHandlerUserData, reason.str());
+ // Blast the result out to stderr. We don't try hard to make sure this
+ // succeeds (e.g. handling EINTR) and we can't use errs() here because
+ // raw ostreams can call report_fatal_error.
+ SmallVector<char, 64> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "LLVM ERROR: " << Reason << "\n";
+ StringRef MessageStr = OS.str();
+ (void)::write(2, MessageStr.data(), MessageStr.size());
}
// If we reached here, we are failing ungracefully. Run the interrupt handlers
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index b8dca334da49..29b595220887 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -23,6 +23,37 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+ // This is adapted from SuperFastHash by Paul Hsieh.
+ unsigned Hash = static_cast<unsigned>(Size);
+ for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
+ unsigned Data = *BP;
+ Hash += Data & 0xFFFF;
+ unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
+ Hash = (Hash << 16) ^ Tmp;
+ Hash += Hash >> 11;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ Hash ^= Hash << 3;
+ Hash += Hash >> 5;
+ Hash ^= Hash << 4;
+ Hash += Hash >> 17;
+ Hash ^= Hash << 25;
+ Hash += Hash >> 6;
+ return Hash;
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+ if (Size != RHS.Size) return false;
+ return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
// FoldingSetNodeID Implementation
/// Add* - Add various data types to Bit data.
@@ -104,31 +135,19 @@ void FoldingSetNodeID::AddString(StringRef String) {
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
/// lookup the node in the FoldingSetImpl.
unsigned FoldingSetNodeID::ComputeHash() const {
- // This is adapted from SuperFastHash by Paul Hsieh.
- unsigned Hash = static_cast<unsigned>(Bits.size());
- for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) {
- unsigned Data = *BP;
- Hash += Data & 0xFFFF;
- unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
- Hash = (Hash << 16) ^ Tmp;
- Hash += Hash >> 11;
- }
-
- // Force "avalanching" of final 127 bits.
- Hash ^= Hash << 3;
- Hash += Hash >> 5;
- Hash ^= Hash << 4;
- Hash += Hash >> 17;
- Hash ^= Hash << 25;
- Hash += Hash >> 6;
- return Hash;
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
}
/// operator== - Used to compare two nodes to each other.
///
bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
- if (Bits.size() != RHS.Bits.size()) return false;
- return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
+ return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
}
/// Intern - Copy this node's data to a memory region allocated from the
@@ -168,10 +187,9 @@ static void **GetBucketPtr(void *NextInBucketPtr) {
/// GetBucketFor - Hash the specified node ID and return the hash bucket for
/// the specified ID.
-static void **GetBucketFor(const FoldingSetNodeID &ID,
- void **Buckets, unsigned NumBuckets) {
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
// NumBuckets is always a power of 2.
- unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1);
+ unsigned BucketNum = Hash & (NumBuckets-1);
return Buckets + BucketNum;
}
@@ -219,7 +237,7 @@ void FoldingSetImpl::GrowHashTable() {
NumNodes = 0;
// Walk the old buckets, rehashing nodes into their new place.
- FoldingSetNodeID ID;
+ FoldingSetNodeID TempID;
for (unsigned i = 0; i != OldNumBuckets; ++i) {
void *Probe = OldBuckets[i];
if (!Probe) continue;
@@ -229,9 +247,10 @@ void FoldingSetImpl::GrowHashTable() {
NodeInBucket->SetNextInBucket(0);
// Insert the node into the new bucket, after recomputing the hash.
- GetNodeProfile(ID, NodeInBucket);
- InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets));
- ID.clear();
+ InsertNode(NodeInBucket,
+ GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+ Buckets, NumBuckets));
+ TempID.clear();
}
}
@@ -245,19 +264,18 @@ FoldingSetImpl::Node
*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
- void **Bucket = GetBucketFor(ID, Buckets, NumBuckets);
+ void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
void *Probe = *Bucket;
InsertPos = 0;
- FoldingSetNodeID OtherID;
+ FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
- GetNodeProfile(OtherID, NodeInBucket);
- if (OtherID == ID)
+ if (NodeEquals(NodeInBucket, ID, TempID))
return NodeInBucket;
+ TempID.clear();
Probe = NodeInBucket->getNextInBucket();
- OtherID.clear();
}
// Didn't find the node, return null with the bucket as the InsertPos.
@@ -273,9 +291,8 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
// Do we need to grow the hashtable?
if (NumNodes+1 > NumBuckets*2) {
GrowHashTable();
- FoldingSetNodeID ID;
- GetNodeProfile(ID, N);
- InsertPos = GetBucketFor(ID, Buckets, NumBuckets);
+ FoldingSetNodeID TempID;
+ InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
}
++NumNodes;
@@ -341,7 +358,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) {
/// instead.
FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
FoldingSetNodeID ID;
- GetNodeProfile(ID, N);
+ GetNodeProfile(N, ID);
void *IP;
if (Node *E = FindNodeOrInsertPos(ID, IP))
return E;
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index a99ab2f30df0..3c8a10849d14 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -72,7 +72,7 @@ asm(".desc ___crashreporter_info__, 0x10");
/// CrashHandler - This callback is run if a fatal signal is delivered to the
/// process, it prints the pretty stack trace.
-static void CrashHandler(void *Cookie) {
+static void CrashHandler(void *) {
#ifndef __APPLE__
// On non-apple systems, just emit the crash stack trace to stderr.
PrintCurStackTrace(errs());
@@ -89,7 +89,8 @@ static void CrashHandler(void *Cookie) {
#ifndef HAVE_CRASHREPORTERCLIENT_H
__crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
#else
- CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+ // Cast to void to avoid warning.
+ (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
#endif
errs() << TmpStr.str();
}
diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp
deleted file mode 100644
index b4e9430e5fdf..000000000000
--- a/lib/Support/SlowOperationInformer.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- SlowOperationInformer.cpp - Keep the user informed ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SlowOperationInformer class for the LLVM debugger.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/SlowOperationInformer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Alarm.h"
-#include <sstream>
-#include <cassert>
-using namespace llvm;
-
-SlowOperationInformer::SlowOperationInformer(const std::string &Name)
- : OperationName(Name), LastPrintAmount(0) {
- sys::SetupAlarm(1);
-}
-
-SlowOperationInformer::~SlowOperationInformer() {
- sys::TerminateAlarm();
- if (LastPrintAmount) {
- // If we have printed something, make _sure_ we print the 100% amount, and
- // also print a newline.
- outs() << std::string(LastPrintAmount, '\b') << "Progress "
- << OperationName << ": 100% \n";
- }
-}
-
-/// progress - Clients should periodically call this method when they are in
-/// an exception-safe state. The Amount variable should indicate how far
-/// along the operation is, given in 1/10ths of a percent (in other words,
-/// Amount should range from 0 to 1000).
-bool SlowOperationInformer::progress(unsigned Amount) {
- int status = sys::AlarmStatus();
- if (status == -1) {
- outs() << "\n";
- LastPrintAmount = 0;
- return true;
- }
-
- // If we haven't spent enough time in this operation to warrant displaying the
- // progress bar, don't do so yet.
- if (status == 0)
- return false;
-
- // Delete whatever we printed last time.
- std::string ToPrint = std::string(LastPrintAmount, '\b');
-
- std::ostringstream OS;
- OS << "Progress " << OperationName << ": " << Amount/10;
- if (unsigned Rem = Amount % 10)
- OS << "." << Rem << "%";
- else
- OS << "% ";
-
- LastPrintAmount = OS.str().size();
- outs() << ToPrint+OS.str();
- outs().flush();
- return false;
-}
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index 2e17af864155..a89f14957635 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
/// on POD-like datatypes and is out of line to reduce code duplication.
void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
size_t CurSizeBytes = size_in_bytes();
- size_t NewCapacityInBytes = 2 * capacity_in_bytes();
+ size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
if (NewCapacityInBytes < MinSizeInBytes)
NewCapacityInBytes = MinSizeInBytes;
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 7d5f65af2842..e32ab74a2d4c 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -44,7 +44,7 @@ Enabled("stats", cl::desc("Enable statistics output from program"));
namespace {
/// StatisticInfo - This class is used in a ManagedStatic so that it is created
-/// on demand (when the first statistic is bumped) and destroyed only when
+/// on demand (when the first statistic is bumped) and destroyed only when
/// llvm_shutdown is called. We print statistics from the destructor.
class StatisticInfo {
std::vector<const Statistic*> Stats;
@@ -52,7 +52,7 @@ class StatisticInfo {
friend void llvm::PrintStatistics(raw_ostream &OS);
public:
~StatisticInfo();
-
+
void addStatistic(const Statistic *S) {
Stats.push_back(S);
}
@@ -71,7 +71,7 @@ void Statistic::RegisterStatistic() {
if (!Initialized) {
if (Enabled)
StatInfo->addStatistic(this);
-
+
sys::MemoryFence();
// Remember we have been registered.
Initialized = true;
@@ -84,7 +84,7 @@ struct NameCompare {
bool operator()(const Statistic *LHS, const Statistic *RHS) const {
int Cmp = std::strcmp(LHS->getName(), RHS->getName());
if (Cmp != 0) return Cmp < 0;
-
+
// Secondary key is the description.
return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
}
@@ -112,7 +112,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
MaxNameLen = std::max(MaxNameLen,
(unsigned)std::strlen(Stats.Stats[i]->getName()));
}
-
+
// Sort the fields by name.
std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
@@ -120,7 +120,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
OS << "===" << std::string(73, '-') << "===\n"
<< " ... Statistics Collected ...\n"
<< "===" << std::string(73, '-') << "===\n\n";
-
+
// Print all of the statistics.
for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
std::string CountStr = utostr(Stats.Stats[i]->getValue());
@@ -129,7 +129,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
<< std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
<< " - " << Stats.Stats[i]->getDesc() << "\n";
}
-
+
OS << '\n'; // Flush the output stream.
OS.flush();
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ca0f518a88b6..46f26b242aac 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -9,6 +9,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
+#include <bitset>
using namespace llvm;
@@ -30,14 +31,14 @@ static bool ascii_isdigit(char x) {
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
- char LHC = ascii_tolower(Data[I]);
- char RHC = ascii_tolower(RHS.Data[I]);
+ unsigned char LHC = ascii_tolower(Data[I]);
+ unsigned char RHC = ascii_tolower(RHS.Data[I]);
if (LHC != RHC)
return LHC < RHC ? -1 : 1;
}
if (Length == RHS.Length)
- return 0;
+ return 0;
return Length < RHS.Length ? -1 : 1;
}
@@ -58,10 +59,10 @@ int StringRef::compare_numeric(StringRef RHS) const {
break;
}
}
- return Data[I] < RHS.Data[I] ? -1 : 1;
+ return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
}
if (Length == RHS.Length)
- return 0;
+ return 0;
return Length < RHS.Length ? -1 : 1;
}
@@ -153,11 +154,15 @@ size_t StringRef::rfind(StringRef Str) const {
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
for (size_type i = min(From, Length), e = Length; i != e; ++i)
- if (Chars.find(Data[i]) != npos)
+ if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
@@ -174,11 +179,15 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
for (size_type i = min(From, Length), e = Length; i != e; ++i)
- if (Chars.find(Data[i]) == npos)
+ if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 299032f18715..c8b260c2e3dd 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -49,6 +49,16 @@ sys::Path llvm::FindExecutable(const std::string &ExeName,
Result.appendComponent(ExeName);
if (Result.canExecute())
return Result;
+ // If the path is absolute (and it usually is), call FindProgramByName to
+ // allow it to try platform-specific logic, such as appending a .exe suffix
+ // on Windows. Don't do this if we somehow have a relative path, because
+ // we don't want to go searching the PATH and accidentally find an unrelated
+ // version of the program.
+ if (Result.isAbsolute()) {
+ Result = sys::Program::FindProgramByName(Result.str());
+ if (!Result.empty())
+ return Result;
+ }
}
return sys::Path();
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 6a70449b56dc..3a95b65e6900 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -221,121 +221,238 @@ const char *Triple::getArchNameForAssembler() {
//
-void Triple::Parse() const {
- assert(!isInitialized() && "Invalid parse call.");
-
- StringRef ArchName = getArchName();
- StringRef VendorName = getVendorName();
- StringRef OSName = getOSName();
-
+Triple::ArchType Triple::ParseArch(StringRef ArchName) {
if (ArchName.size() == 4 && ArchName[0] == 'i' &&
ArchName[2] == '8' && ArchName[3] == '6' &&
ArchName[1] - '3' < 6) // i[3-9]86
- Arch = x86;
+ return x86;
else if (ArchName == "amd64" || ArchName == "x86_64")
- Arch = x86_64;
+ return x86_64;
else if (ArchName == "bfin")
- Arch = bfin;
+ return bfin;
else if (ArchName == "pic16")
- Arch = pic16;
+ return pic16;
else if (ArchName == "powerpc")
- Arch = ppc;
+ return ppc;
else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
- Arch = ppc64;
+ return ppc64;
else if (ArchName == "mblaze")
- Arch = mblaze;
+ return mblaze;
else if (ArchName == "arm" ||
ArchName.startswith("armv") ||
ArchName == "xscale")
- Arch = arm;
+ return arm;
else if (ArchName == "thumb" ||
ArchName.startswith("thumbv"))
- Arch = thumb;
+ return thumb;
else if (ArchName.startswith("alpha"))
- Arch = alpha;
+ return alpha;
else if (ArchName == "spu" || ArchName == "cellspu")
- Arch = cellspu;
+ return cellspu;
else if (ArchName == "msp430")
- Arch = msp430;
+ return msp430;
else if (ArchName == "mips" || ArchName == "mipsallegrex")
- Arch = mips;
+ return mips;
else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
ArchName == "psp")
- Arch = mipsel;
+ return mipsel;
else if (ArchName == "sparc")
- Arch = sparc;
+ return sparc;
else if (ArchName == "sparcv9")
- Arch = sparcv9;
+ return sparcv9;
else if (ArchName == "s390x")
- Arch = systemz;
+ return systemz;
else if (ArchName == "tce")
- Arch = tce;
+ return tce;
else if (ArchName == "xcore")
- Arch = xcore;
+ return xcore;
else
- Arch = UnknownArch;
-
-
- // Handle some exceptional cases where the OS / environment components are
- // stuck into the vendor field.
- if (StringRef(getTriple()).count('-') == 1) {
- StringRef VendorName = getVendorName();
-
- if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc.
- Vendor = PC;
- OS = MinGW32;
- return;
- }
-
- // arm-elf is another example, but we don't currently parse anything about
- // the environment.
- }
+ return UnknownArch;
+}
+Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
if (VendorName == "apple")
- Vendor = Apple;
+ return Apple;
else if (VendorName == "pc")
- Vendor = PC;
+ return PC;
else
- Vendor = UnknownVendor;
+ return UnknownVendor;
+}
+Triple::OSType Triple::ParseOS(StringRef OSName) {
if (OSName.startswith("auroraux"))
- OS = AuroraUX;
+ return AuroraUX;
else if (OSName.startswith("cygwin"))
- OS = Cygwin;
+ return Cygwin;
else if (OSName.startswith("darwin"))
- OS = Darwin;
+ return Darwin;
else if (OSName.startswith("dragonfly"))
- OS = DragonFly;
+ return DragonFly;
else if (OSName.startswith("freebsd"))
- OS = FreeBSD;
+ return FreeBSD;
else if (OSName.startswith("linux"))
- OS = Linux;
+ return Linux;
else if (OSName.startswith("lv2"))
- OS = Lv2;
+ return Lv2;
else if (OSName.startswith("mingw32"))
- OS = MinGW32;
+ return MinGW32;
else if (OSName.startswith("mingw64"))
- OS = MinGW64;
+ return MinGW64;
else if (OSName.startswith("netbsd"))
- OS = NetBSD;
+ return NetBSD;
else if (OSName.startswith("openbsd"))
- OS = OpenBSD;
+ return OpenBSD;
else if (OSName.startswith("psp"))
- OS = Psp;
+ return Psp;
else if (OSName.startswith("solaris"))
- OS = Solaris;
+ return Solaris;
else if (OSName.startswith("win32"))
- OS = Win32;
+ return Win32;
else if (OSName.startswith("haiku"))
- OS = Haiku;
+ return Haiku;
else if (OSName.startswith("minix"))
- OS = Minix;
+ return Minix;
else
- OS = UnknownOS;
+ return UnknownOS;
+}
+
+void Triple::Parse() const {
+ assert(!isInitialized() && "Invalid parse call.");
+
+ Arch = ParseArch(getArchName());
+ Vendor = ParseVendor(getVendorName());
+ OS = ParseOS(getOSName());
assert(isInitialized() && "Failed to initialize!");
}
+std::string Triple::normalize(StringRef Str) {
+ // Parse into components.
+ SmallVector<StringRef, 4> Components;
+ for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
+ Last = Str.find('-', First);
+ Components.push_back(Str.slice(First, Last));
+ }
+
+ // If the first component corresponds to a known architecture, preferentially
+ // use it for the architecture. If the second component corresponds to a
+ // known vendor, preferentially use it for the vendor, etc. This avoids silly
+ // component movement when a component parses as (eg) both a valid arch and a
+ // valid os.
+ ArchType Arch = UnknownArch;
+ if (Components.size() > 0)
+ Arch = ParseArch(Components[0]);
+ VendorType Vendor = UnknownVendor;
+ if (Components.size() > 1)
+ Vendor = ParseVendor(Components[1]);
+ OSType OS = UnknownOS;
+ if (Components.size() > 2)
+ OS = ParseOS(Components[2]);
+
+ // Note which components are already in their final position. These will not
+ // be moved.
+ bool Found[3];
+ Found[0] = Arch != UnknownArch;
+ Found[1] = Vendor != UnknownVendor;
+ Found[2] = OS != UnknownOS;
+
+ // If they are not there already, permute the components into their canonical
+ // positions by seeing if they parse as a valid architecture, and if so moving
+ // the component to the architecture position etc.
+ for (unsigned Pos = 0; Pos != 3; ++Pos) {
+ if (Found[Pos])
+ continue; // Already in the canonical position.
+
+ for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+ // Do not reparse any components that already matched.
+ if (Idx < 3 && Found[Idx])
+ continue;
+
+ // Does this component parse as valid for the target position?
+ bool Valid = false;
+ StringRef Comp = Components[Idx];
+ switch (Pos) {
+ default:
+ assert(false && "unexpected component type!");
+ case 0:
+ Arch = ParseArch(Comp);
+ Valid = Arch != UnknownArch;
+ break;
+ case 1:
+ Vendor = ParseVendor(Comp);
+ Valid = Vendor != UnknownVendor;
+ break;
+ case 2:
+ OS = ParseOS(Comp);
+ Valid = OS != UnknownOS;
+ break;
+ }
+ if (!Valid)
+ continue; // Nope, try the next component.
+
+ // Move the component to the target position, pushing any non-fixed
+ // components that are in the way to the right. This tends to give
+ // good results in the common cases of a forgotten vendor component
+ // or a wrongly positioned environment.
+ if (Pos < Idx) {
+ // Insert left, pushing the existing components to the right. For
+ // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+ StringRef CurrentComponent(""); // The empty component.
+ // Replace the component we are moving with an empty component.
+ std::swap(CurrentComponent, Components[Idx]);
+ // Insert the component being moved at Pos, displacing any existing
+ // components to the right.
+ for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+ // Skip over any fixed components.
+ while (i < 3 && Found[i]) ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ }
+ } else if (Pos > Idx) {
+ // Push right by inserting empty components until the component at Idx
+ // reaches the target position Pos. For example, pc-a -> -pc-a when
+ // moving pc to the second position.
+ do {
+ // Insert one empty component at Idx.
+ StringRef CurrentComponent(""); // The empty component.
+ for (unsigned i = Idx; i < Components.size(); ++i) {
+ // Skip over any fixed components.
+ while (i < 3 && Found[i]) ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ // If it was placed on top of an empty component then we are done.
+ if (CurrentComponent.empty())
+ break;
+ }
+ // The last component was pushed off the end - append it.
+ if (!CurrentComponent.empty())
+ Components.push_back(CurrentComponent);
+
+ // Advance Idx to the component's new position.
+ while (++Idx < 3 && Found[Idx]) {}
+ } while (Idx < Pos); // Add more until the final position is reached.
+ }
+ assert(Pos < Components.size() && Components[Pos] == Comp &&
+ "Component moved wrong!");
+ Found[Pos] = true;
+ break;
+ }
+ }
+
+ // Special case logic goes here. At this point Arch, Vendor and OS have the
+ // correct values for the computed components.
+
+ // Stick the corrected components back together to form the normalized string.
+ std::string Normalized;
+ for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+ if (i) Normalized += '-';
+ Normalized += Components[i];
+ }
+ return Normalized;
+}
+
StringRef Triple::getArchName() const {
return StringRef(Data).split('-').first; // Isolate first component
}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 8054ae63688c..dba46df36256 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -19,6 +19,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/System/Signals.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
#include <cerrno>
@@ -56,13 +57,6 @@ raw_ostream::~raw_ostream() {
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
-
- // If there are any pending errors, report them now. Clients wishing
- // to avoid report_fatal_error calls should check for errors with
- // has_error() and clear the error flag with clear_error() before
- // destructing raw_ostream objects which may have errors.
- if (Error)
- report_fatal_error("IO failure on output stream.");
}
// An out of line virtual method to provide a home for the class vtable.
@@ -143,9 +137,10 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) {
}
raw_ostream &raw_ostream::operator<<(long long N) {
- if (N < 0) {
+ if (N < 0) {
*this << '-';
- N = -N;
+ // Avoid undefined behavior on INT64_MIN with a cast.
+ N = -(unsigned long long)N;
}
return this->operator<<(static_cast<unsigned long long>(N));
@@ -368,7 +363,7 @@ void format_object_base::home() {
/// stream should be immediately destroyed; the string will be empty
/// if no error occurred.
raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
- unsigned Flags) : pos(0) {
+ unsigned Flags) : Error(false), pos(0) {
assert(Filename != 0 && "Filename is null");
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
@@ -376,14 +371,17 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
ErrorInfo.clear();
- // Handle "-" as stdout.
+ // Handle "-" as stdout. Note that when we do this, we consider ourself
+ // the owner of stdout. This means that we can do things like close the
+ // file descriptor when we're done and set the "binary" flag globally.
if (Filename[0] == '-' && Filename[1] == 0) {
FD = STDOUT_FILENO;
// If user requested binary then put stdout into binary mode if
// possible.
if (Flags & F_Binary)
sys::Program::ChangeStdoutToBinary();
- ShouldClose = false;
+ // Close stdout when we're done, to detect any output errors.
+ ShouldClose = true;
return;
}
@@ -413,14 +411,22 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
}
raw_fd_ostream::~raw_fd_ostream() {
- if (FD < 0) return;
- flush();
- if (ShouldClose)
- while (::close(FD) != 0)
- if (errno != EINTR) {
- error_detected();
- break;
- }
+ if (FD >= 0) {
+ flush();
+ if (ShouldClose)
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ }
+
+ // If there are any pending errors, report them now. Clients wishing
+ // to avoid report_fatal_error calls should check for errors with
+ // has_error() and clear the error flag with clear_error() before
+ // destructing raw_ostream objects which may have errors.
+ if (has_error())
+ report_fatal_error("IO failure on output stream.");
}
@@ -534,30 +540,24 @@ bool raw_fd_ostream::is_displayed() const {
}
//===----------------------------------------------------------------------===//
-// raw_stdout/err_ostream
+// outs(), errs(), nulls()
//===----------------------------------------------------------------------===//
-// Set buffer settings to model stdout and stderr behavior.
-// Set standard error to be unbuffered by default.
-raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {}
-raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false,
- true) {}
-
-// An out of line virtual method to provide a home for the class vtable.
-void raw_stdout_ostream::handle() {}
-void raw_stderr_ostream::handle() {}
-
/// outs() - This returns a reference to a raw_ostream for standard output.
/// Use it like: outs() << "foo" << "bar";
raw_ostream &llvm::outs() {
- static raw_stdout_ostream S;
+ // Set buffer settings to model stdout behavior.
+ // Delete the file descriptor when the program exists, forcing error
+ // detection. If you don't want this behavior, don't use outs().
+ static raw_fd_ostream S(STDOUT_FILENO, true);
return S;
}
/// errs() - This returns a reference to a raw_ostream for standard error.
/// Use it like: errs() << "foo" << "bar";
raw_ostream &llvm::errs() {
- static raw_stderr_ostream S;
+ // Set standard error to be unbuffered by default.
+ static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}
@@ -665,3 +665,34 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
uint64_t raw_null_ostream::current_pos() const {
return 0;
}
+
+//===----------------------------------------------------------------------===//
+// tool_output_file
+//===----------------------------------------------------------------------===//
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+ : Filename(filename), Keep(false) {
+ // Arrange for the file to be deleted if the process is killed.
+ if (Filename != "-")
+ sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+ // Delete the file if the client hasn't told us not to.
+ if (!Keep && Filename != "-")
+ sys::Path(Filename).eraseFromDisk();
+
+ // Ok, the file is successfully written and closed, or deleted. There's no
+ // further need to clean it up on signals.
+ if (Filename != "-")
+ sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Installer(filename),
+ OS(filename, ErrorInfo, Flags) {
+ // If open fails, no cleanup is needed.
+ if (!ErrorInfo.empty())
+ Installer.Keep = true;
+}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 6f6890c06c49..660db492d6b9 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -70,6 +70,12 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
if (ErrMsg) *ErrMsg = dlerror();
return true;
}
+#ifdef __CYGWIN__
+ // Cygwin searches symbols only in the main
+ // with the handle of dlopen(NULL, RTLD_GLOBAL).
+ if (Filename == NULL)
+ H = RTLD_DEFAULT;
+#endif
if (OpenedHandles == 0)
OpenedHandles = new std::vector<void *>();
OpenedHandles->push_back(H);
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index 1235257b27e2..4445c667d86e 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -61,7 +61,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
if (memcmp(magic,"!<arch>\n",8) == 0)
return Archive_FileType;
break;
-
+
case '\177':
if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
if (length >= 18 && magic[17] == 0)
@@ -76,11 +76,11 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
break;
case 0xCA:
- if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
magic[3] == char(0xBE)) {
- // This is complicated by an overlap with Java class files.
+ // This is complicated by an overlap with Java class files.
// See the Mach-O section in /usr/share/file/magic for details.
- if (length >= 8 && magic[7] < 43)
+ if (length >= 8 && magic[7] < 43)
// FIXME: Universal Binary of any type.
return Mach_O_DynamicallyLinkedSharedLib_FileType;
}
@@ -89,18 +89,18 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 0xFE:
case 0xCE: {
uint16_t type = 0;
- if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
/* Native endian */
if (length >= 16) type = magic[14] << 8 | magic[15];
- } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
+ } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
magic[2] == char(0xED) && magic[3] == char(0xFE)) {
/* Reverse endian */
if (length >= 14) type = magic[13] << 8 | magic[12];
}
switch (type) {
- default: break;
- case 1: return Mach_O_Object_FileType;
+ default: break;
+ case 1: return Mach_O_Object_FileType;
case 2: return Mach_O_Executable_FileType;
case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
case 4: return Mach_O_Core_FileType;
@@ -219,38 +219,38 @@ static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
"Sep must be a 1-character string literal.");
if (path.empty())
return ".";
-
+
// If the path is all slashes, return a single slash.
// Otherwise, remove all trailing slashes.
-
+
signed pos = static_cast<signed>(path.size()) - 1;
-
+
while (pos >= 0 && path[pos] == Sep[0])
--pos;
-
+
if (pos < 0)
return path[0] == Sep[0] ? Sep : ".";
-
+
// Any slashes left?
signed i = 0;
-
+
while (i < pos && path[i] != Sep[0])
++i;
-
+
if (i == pos) // No slashes? Return "."
return ".";
-
- // There is at least one slash left. Remove all trailing non-slashes.
+
+ // There is at least one slash left. Remove all trailing non-slashes.
while (pos >= 0 && path[pos] != Sep[0])
--pos;
-
+
// Remove any trailing slashes.
while (pos >= 0 && path[pos] == Sep[0])
--pos;
-
+
if (pos < 0)
return path[0] == Sep[0] ? Sep : ".";
-
+
return path.substr(0, pos+1);
}
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
index 5faf220eb916..deb04709d829 100644
--- a/lib/System/RWMutex.cpp
+++ b/lib/System/RWMutex.cpp
@@ -71,23 +71,9 @@ RWMutexImpl::RWMutexImpl()
bzero(rwlock, sizeof(pthread_rwlock_t));
#endif
- pthread_rwlockattr_t attr;
-
- // Initialize the rwlock attributes
- int errorcode = pthread_rwlockattr_init(&attr);
- assert(errorcode == 0);
-
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
- // Make it a process local rwlock
- errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
-#endif
-
// Initialize the rwlock
- errorcode = pthread_rwlock_init(rwlock, &attr);
- assert(errorcode == 0);
-
- // Destroy the attributes
- errorcode = pthread_rwlockattr_destroy(&attr);
+ int errorcode = pthread_rwlock_init(rwlock, NULL);
+ (void)errorcode;
assert(errorcode == 0);
// Assign the data member
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
index e7054b528147..f6a55a1c0b9b 100644
--- a/lib/System/ThreadLocal.cpp
+++ b/lib/System/ThreadLocal.cpp
@@ -27,6 +27,7 @@ ThreadLocalImpl::ThreadLocalImpl() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
}
#else
@@ -67,6 +68,10 @@ const void* ThreadLocalImpl::getInstance() {
return pthread_getspecific(*key);
}
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
}
#elif defined(LLVM_ON_UNIX)
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index bc104a32a3ae..47e4d1ac3c6b 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -276,20 +276,20 @@ Path::GetCurrentDirectory() {
char pathname[MAXPATHLEN];
if (!getcwd(pathname,MAXPATHLEN)) {
assert (false && "Could not query current working directory.");
- return Path("");
+ return Path();
}
return Path(pathname);
}
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
{
struct stat sb;
- snprintf(buf, PATH_MAX, "%s//%s", dir, bin);
+ snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
if (realpath(buf, ret) == NULL)
return (1);
if (stat(buf, &sb) != 0)
@@ -334,7 +334,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
free(pv);
return (NULL);
}
-#endif // __FreeBSD__
+#endif // __FreeBSD__ || __NetBSD__
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
@@ -350,7 +350,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
if (realpath(exe_path, link_path))
return Path(std::string(link_path));
}
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
@@ -408,7 +408,7 @@ Path::getSuffix() const {
std::string::size_type dot = path.rfind('.');
if (dot == std::string::npos || dot < slash)
- return StringRef("");
+ return StringRef();
else
return StringRef(path).substr(dot + 1);
}
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 1e74647e5fdc..7b7c43efc786 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -182,6 +182,16 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
return false;
}
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ SignalsMutex.acquire();
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+ if (I != FilesToRemove.rend())
+ FilesToRemove.erase(I.base()-1);
+ SignalsMutex.release();
+}
+
/// AddSignalHandler - Add a function to be called when a signal is delivered
/// to the process. The handler can have a cookie passed to it to identify
/// what instance of the handler it is.
@@ -253,3 +263,37 @@ void llvm::sys::PrintStackTraceOnErrorSignal() {
AddSignalHandler(PrintStackTrace, 0);
}
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+void __assert_rtn(const char *func,
+ const char *file,
+ int line,
+ const char *expr) {
+ if (func)
+ fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+ expr, func, file, line);
+ else
+ fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+ expr, file, line);
+ abort();
+}
+
+#include <signal.h>
+#include <pthread.h>
+
+void abort() {
+ pthread_kill(pthread_self(), SIGABRT);
+ usleep(1000);
+ __builtin_trap();
+}
+
+#endif
diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc
index 83d554d3077c..6769520a6fb6 100644
--- a/lib/System/Unix/ThreadLocal.inc
+++ b/lib/System/Unix/ThreadLocal.inc
@@ -22,4 +22,5 @@ ThreadLocalImpl::ThreadLocalImpl() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
}
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index 379527d4ebf2..4a6dbd3ddf29 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -400,8 +400,10 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
for (unsigned i = 0; i < path.length(); ++i)
status.uniqueID += path[i];
- __int64 ft = *reinterpret_cast<__int64*>(&fi.ftLastWriteTime);
- status.modTime.fromWin32Time(ft);
+ ULARGE_INTEGER ui;
+ ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+ ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+ status.modTime.fromWin32Time(ui.QuadPart);
status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
fsIsValid = true;
@@ -720,7 +722,7 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
assert(len < 1024 && "Request for magic string too long");
- char* buf = (char*) alloca(1 + len);
+ char* buf = reinterpret_cast<char*>(alloca(len));
HANDLE h = CreateFile(path.c_str(),
GENERIC_READ,
@@ -739,8 +741,7 @@ bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
if (!ret || nRead != len)
return false;
- buf[len] = '\0';
- Magic = buf;
+ Magic = std::string(buf, len);
return true;
}
@@ -777,8 +778,11 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
}
+ ULARGE_INTEGER ui;
+ ui.QuadPart = si.modTime.toWin32Time();
FILETIME ft;
- (uint64_t&)ft = si.modTime.toWin32Time();
+ ft.dwLowDateTime = ui.LowPart;
+ ft.dwHighDateTime = ui.HighPart;
BOOL ret = SetFileTime(h, NULL, &ft, &ft);
DWORD err = GetLastError();
CloseHandle(h);
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index d6db71ba4f35..2498a26ea99c 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -140,6 +140,20 @@ bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
return false;
}
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ if (FilesToRemove == NULL)
+ return;
+
+ FilesToRemove->push_back(Filename);
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+ if (I != FilesToRemove->rend())
+ FilesToRemove->erase(I.base()-1);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
void sys::PrintStackTraceOnErrorSignal() {
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
index c8f7840b0038..b8b933c4d29d 100644
--- a/lib/System/Win32/ThreadLocal.inc
+++ b/lib/System/Win32/ThreadLocal.inc
@@ -46,4 +46,8 @@ void ThreadLocalImpl::setInstance(const void* d){
assert(errorcode != 0);
}
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 14825a785649..271ca44c2b69 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -30,22 +30,22 @@ class formatted_raw_ostream;
namespace ARMCC {
// The CondCodes constants map directly to the 4-bit encoding of the
// condition field for predicated instructions.
- enum CondCodes {
- EQ,
- NE,
- HS,
- LO,
- MI,
- PL,
- VS,
- VC,
- HI,
- LS,
- GE,
- LT,
- GT,
- LE,
- AL
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
};
inline static CondCodes getOppositeCondition(CondCodes CC) {
@@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory opetion");
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index fa64d6c2a4b4..d6a8f19724dc 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,20 +20,6 @@ include "llvm/Target/Target.td"
// ARM Subtarget features.
//
-def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
- "ARM v4T">;
-def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
- "ARM v5T">;
-def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
- "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
- "ARM v6">;
-def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
- "ARM v6t2">;
-def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
- "ARM v7A">;
-def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
- "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
"Enable NEON instructions">;
def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
-def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
// others as well. We should do more benchmarking and confirm one way or
// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
- "Disable VFP MAC instructions">;
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
"true",
"Use NEON for single precision FP">;
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+
+// ARM architectures.
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+ "ARM v6m",
+ [FeatureNoARM, FeatureDB]>;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2",
+ [FeatureThumb2]>;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A",
+ [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M",
+ [FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
+
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
+ [ArchV7A, FeatureHasSlowVMLx,
+ FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
-def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
-def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+ [ArchV7A, FeatureT2XtPk]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [ArchV7M]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 92a13f1d751c..db481005b3a4 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -458,6 +458,7 @@ namespace ARM_AM {
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
@@ -477,14 +478,6 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand encodes the
// operation in bit 8 and the immediate in bits 0-7.
- //
- // This is also used for FP load/store multiple ops. The second operand
- // encodes the number of registers (or 2 times the number of registers
- // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
- // following two sub-modes:
- //
- // IA - Increment after
- // DB - Decrement before
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
@@ -498,17 +491,6 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
- /// VSTM instructions.
- static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
- assert((SubMode == ia || SubMode == db) &&
- "Illegal addressing mode 5 sub-mode!");
- return ((int)SubMode << 8) | Offset;
- }
- static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
- return (AMSubMode)((AM5Opc >> 8) & 0x7);
- }
-
//===--------------------------------------------------------------------===//
// Addressing Mode #6
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 946f4744f5bb..6cfd5961149f 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,7 +17,7 @@
#include "ARMBuildAttrs.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstPrinter.h"
+#include "AsmPrinter/ARMInstPrinter.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMMCInstLower.h"
#include "ARMTargetMachine.h"
@@ -47,6 +47,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -56,6 +57,15 @@ static cl::opt<bool>
EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
cl::desc("enable experimental asmprinter gunk in the arm backend"));
+namespace llvm {
+ namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+ }
+}
+
namespace {
class ARMAsmPrinter : public AsmPrinter {
@@ -80,9 +90,9 @@ namespace {
virtual const char *getPassName() const {
return "ARM Assembly Printer";
}
-
+
void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
+
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
const char *Modifier = 0);
@@ -110,8 +120,12 @@ namespace {
void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O,
const char *Modifier = 0);
- void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum,
- raw_ostream &O);
+ void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
@@ -190,12 +204,32 @@ namespace {
virtual void EmitInstruction(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &F);
-
+
virtual void EmitConstantPool() {} // we emit constant pools customly!
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -208,7 +242,7 @@ namespace {
EmitMachineConstantPoolValue(MCPV, OS);
OutStreamer.EmitRawText(OS.str());
}
-
+
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
raw_ostream &O) {
switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
@@ -234,7 +268,7 @@ namespace {
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
O << *Sym;
-
+
MachineModuleInfoMachO &MMIMachO =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitRawText(OS.str());
}
}
-
+
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
+
if (isCallOp && Subtarget->isTargetELF() &&
TM.getRelocationModel() == Reloc::PIC_)
O << "(PLT)";
@@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << " ";
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
O << "#" << lsb << ", #" << width;
}
+void
+ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
//===--------------------------------------------------------------------===//
void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
@@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
@@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
+
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const char *JTEntryDirective = MAI->getData32bitsDirective();
@@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
+
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
O << MAI->getData8bitsDirective();
else if (HalfWordOffset)
O << MAI->getData16bitsDirective();
-
+
if (ByteOffset || HalfWordOffset)
O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
else
@@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
return;
}
-
+
if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
EmitAlignment(2);
-
+
SmallString<128> Str;
raw_svector_ostream OS(Str);
if (MI->getOpcode() == ARM::DBG_VALUE) {
@@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstruction(MI, OS);
OutStreamer.EmitRawText(OS.str());
-
+
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction instead.
if (MI->getOpcode() == ARM::t2TBB)
@@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// avoid out-of-range branches that are due a fundamental limitation of
// the way symbol offsets are encoded with the current Darwin ARM
// relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
+ const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
OutStreamer.SwitchSection(TLOFMacho.getTextSection());
@@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
16, SectionKind::getText());
OutStreamer.SwitchSection(sect);
}
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
}
}
@@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
}
-
- if (FiniteOnlyFPMath())
+
+ if (NoInfsFPMath && NoNaNsFPMath)
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
else
@@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// LPC0:
// add r0, pc, r0
// This adds the address of LPC0 to r0.
-
+
// Emit the label.
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
@@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
-
-
+
+
// Form and emit tha dd.
MCInst AddInst;
AddInst.setOpcode(ARM::ADDrr);
@@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
+
return;
}
case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
@@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi);
TmpInst.addOperand(MCOperand::CreateReg(DstReg));
TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
@@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
OutStreamer.EmitInstruction(TmpInst);
}
- return;
+ return;
}
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
@@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(V1); // lower16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
TmpInst.addOperand(V2); // upper16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 49c16f3e0720..3a8bebe0dd24 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,9 +15,9 @@
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
llvm_unreachable("Unknown or unset size field for instr!");
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
@@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- switch (MI.getOpcode()) {
- default: break;
- case ARM::VMOVS:
- case ARM::VMOVD:
- case ARM::VMOVDneon:
- case ARM::VMOVQ:
- case ARM::VMOVQQ : {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- case ARM::MOVr:
- case ARM::MOVr_TC:
- case ARM::tMOVr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2gpr:
- case ARM::t2MOVr: {
- assert(MI.getDesc().getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "Invalid ARM MOV instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- }
-
- return false;
-}
-
unsigned
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Align);
// tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
unsigned PCLabelId = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
if (ACPV->isGlobalValue())
NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
ARMCP::CPValue, 4);
@@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
else if (ACPV->isBlockAddress())
NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::CMPzri:
+ case ARM::t2CMPri:
+ case ARM::t2CMPzri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
+/// that we can remove a "comparison with zero".
+bool ARMBaseInstrInfo::
+ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI;
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || !MO.isDef()) continue;
+
+ // This instruction modifies CPSR before the one we want to change. We
+ // can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MI->RemoveOperand(5);
+ MachineInstrBuilder(MI)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 89a2db74a75e..b4f4a33a70ad 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,11 +15,12 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
/// ARMII - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -97,44 +98,45 @@ namespace ARMII {
// Miscellaneous arithmetic instructions
ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
// Extend instructions
- ExtFrm = 13 << FormShift,
+ ExtFrm = 14 << FormShift,
// VFP formats
- VFPUnaryFrm = 14 << FormShift,
- VFPBinaryFrm = 15 << FormShift,
- VFPConv1Frm = 16 << FormShift,
- VFPConv2Frm = 17 << FormShift,
- VFPConv3Frm = 18 << FormShift,
- VFPConv4Frm = 19 << FormShift,
- VFPConv5Frm = 20 << FormShift,
- VFPLdStFrm = 21 << FormShift,
- VFPLdStMulFrm = 22 << FormShift,
- VFPMiscFrm = 23 << FormShift,
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
// Thumb format
- ThumbFrm = 24 << FormShift,
+ ThumbFrm = 25 << FormShift,
// Miscelleaneous format
- MiscFrm = 25 << FormShift,
+ MiscFrm = 26 << FormShift,
// NEON formats
- NGetLnFrm = 26 << FormShift,
- NSetLnFrm = 27 << FormShift,
- NDupFrm = 28 << FormShift,
- NLdStFrm = 29 << FormShift,
- N1RegModImmFrm= 30 << FormShift,
- N2RegFrm = 31 << FormShift,
- NVCVTFrm = 32 << FormShift,
- NVDupLnFrm = 33 << FormShift,
- N2RegVShLFrm = 34 << FormShift,
- N2RegVShRFrm = 35 << FormShift,
- N3RegFrm = 36 << FormShift,
- N3RegVShFrm = 37 << FormShift,
- NVExtFrm = 38 << FormShift,
- NVMulSLFrm = 39 << FormShift,
- NVTBLFrm = 40 << FormShift,
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -198,7 +200,7 @@ namespace ARMII {
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
- const ARMSubtarget& Subtarget;
+ const ARMSubtarget &Subtarget;
protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
@@ -223,7 +225,7 @@ public:
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -262,12 +264,6 @@ public:
///
virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -341,6 +337,17 @@ public:
unsigned NumInstrs) const {
return NumInstrs && NumInstrs == 1;
}
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpValue) const;
+
+ /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
+ MachineInstr *CmpInstr) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 182bd9937145..eceafad63f17 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,13 +40,20 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
-cl::opt<bool>
-ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
- cl::desc("Reuse repeated frame index values"));
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
}
using namespace llvm;
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
TII(tii), STI(sti),
- FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
}
const unsigned*
@@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
- if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(ARM::FPSCR);
+ if (hasFP(MF))
Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
Reserved.set(ARM::R9);
@@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
case ARM::SP:
case ARM::PC:
return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- !MFI->hasVarSizedObjects());
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
}
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- (MFI->getMaxAlignment() > StackAlign) &&
- !MFI->hasVarSizedObjects());
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
}
bool ARMBaseRegisterInfo::
@@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
return Limit;
}
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
void
ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ // Spill the BasePtr if it's used.
+ if (hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(BasePtr);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
bool ForceLRSpill = false;
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
// use of BL to implement far jump. If it turns out that it's not needed
// then the branch fix up path will undo it.
@@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// slot of the previous FP. Also, if we have variable sized objects in the
// function, stack slot references will often be negative, and some of
// our instructions are positive-offset only, so conservatively consider
- // that case to want a spill slot (or register) as well.
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned
+unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
+// Provide a base+offset reference to an FI slot for debug info. It's the
+// same as what we use for resolving the code-gen references for now.
+// FIXME: This can go wrong when references are SP-relative and simple call
+// frames aren't used.
int
ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
bool isFixed = MFI->isFixedObjectIndex(FI);
FrameReg = ARM::SP;
+ Offset += SPAdj;
if (AFI->isGPRCalleeSavedArea1Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- Offset -= AFI->getDPRCalleeSavedAreaOffset();
- else if (needsStackRealignment(MF)) {
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack pointer for locals.
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (needsStackRealignment(MF)) {
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = BasePtr;
}
- } else if (hasFP(MF) && AFI->hasStackFrame()) {
- if (isFixed || MFI->hasVarSizedObjects()) {
- // Use frame pointer to reference fixed objects unless this is a
- // frameless function.
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) && "missing base pointer!");
+ // Use the base register since we have it.
+ FrameReg = BasePtr;
} else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited.
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
+ return FPOffset;
}
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = getFrameRegister(MF);
+ return FPOffset;
}
}
+ // Use the base pointer if we have one.
+ if (hasBasePointer(MF))
+ FrameReg = BasePtr;
return Offset;
}
-
int
ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
@@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
case ARM::R5:
return ARM::R4;
case ARM::R7:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
case ARM::R9:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
case ARM::R11:
@@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
case ARM::R4:
return ARM::R5;
case ARM::R6:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
case ARM::R8:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
case ARM::R10:
@@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
// not required, we reserve argument space for call sites in the function
// immediately on entry to the current function. This eliminates the need for
// add/sub sp brackets around call sites. Returns true if the call frame is
// included as part of the stack frame.
bool ARMBaseRegisterInfo::
-hasReservedCallFrame(MachineFunction &MF) const {
+hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const {
// is not sufficient here since we still may reference some objects via SP
// even when FP is available in Thumb2 mode.
bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(MachineFunction &MF) const {
+canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
+ case ARM::STR: case ARM::STRH: case ARM::STRB:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for
+/// BaseReg to be a pointer to FrameIdx before insertion point I.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
+ int FrameIdx, int64_t Offset) const {
+ ARMFunctionInfo *AFI =
+ I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineInstrBuilder MIB =
+ BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
@@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned FrameReg;
- int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
- if (FrameReg != ARM::SP)
- SPAdj = 0;
- Offset += SPAdj;
+ int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
@@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
}
if (Done)
- return 0;
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
@@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
- if (Value) {
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
- }
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
@@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
- if (!ReuseFrameIndexVals)
- ScratchReg = 0;
}
- return ScratchReg;
}
/// Move iterator past the next bunch of callee save load / store ops for
@@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
- // If we need dynamic stack realignment, do it here.
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
if (needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
@@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
+ .addReg(ARM::SP);
}
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp.
+ if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
+ BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
@@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
} else if (RetOpcode == ARM::TCRETURNriND) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
addReg(JumpTarget.getReg(), RegState::Kill);
- }
+ }
MachineInstr *NewMI = prior(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f7ee0d5cc66d..fa2eb6c10498 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) {
}
}
-struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
@@ -52,6 +52,11 @@ protected:
/// FramePtr - ARM physical register used as frame ptr.
unsigned FramePtr;
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
// Can be only subclassed.
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &STI);
@@ -102,9 +107,18 @@ public:
MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
bool cannotEliminateFrame(const MachineFunction &MF) const;
@@ -116,6 +130,8 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
// Exception handling queries.
@@ -144,16 +160,17 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- virtual bool hasReservedCallFrame(MachineFunction &MF) const;
- virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8fdb07f81626..293e32aa5376 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
- CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>,
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[f64], CCAssignToStack<8, 8>>,
CCIfType<[v2f64], CCAssignToStack<16, 8>>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7895cb071922..b1a702f90cfc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -65,7 +65,7 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
@@ -124,6 +124,8 @@ namespace {
void emitMiscArithInstruction(const MachineInstr &MI);
+ void emitSaturateInstruction(const MachineInstr &MI);
+
void emitBranchInstruction(const MachineInstr &MI);
void emitInlineJumpTable(unsigned JTIndex);
@@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::ArithMiscFrm:
emitMiscArithInstruction(MI);
break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
case ARMII::BrFrm:
emitBranchInstruction(MI);
break;
@@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX:
+ case ARM::BMOVPCRX:
+ case ARM::BXr9:
+ case ARM::BMOVPCRXr9: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
break;
}
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
@@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
// Encode shift_imm.
unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (TID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
Binary |= ShiftAmt << ARMII::ShiftShift;
emitWordLE(Binary);
}
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ TID.Opcode != ARM::SSAT16 &&
+ TID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (TID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// Set addressing mode by modifying bits U(23) and P(24)
const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
// Set bit W(21)
if (IsUpdating)
@@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// First register is encoded in Dd.
Binary |= encodeVFPRd(MI, OpIdx+2);
- // Number of registers are encoded in offset field.
+ // Count the number of registers.
unsigned NumRegs = 1;
for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 65a3da6f1617..60e923bd2c38 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,9 +18,9 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -165,7 +165,7 @@ namespace {
/// HasInlineAsm - True if the function contains inline assembly.
bool HasInlineAsm;
- const TargetInstrInfo *TII;
+ const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -173,7 +173,7 @@ namespace {
bool isThumb2;
public:
static char ID;
- ARMConstantIslands() : MachineFunctionPass(&ID) {}
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
MachineConstantPool &MCP = *MF.getConstantPool();
- TII = MF.getTarget().getInstrInfo();
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
@@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// constant pool users.
InitialFunctionScan(MF, CPEMIs);
CPEMIs.clear();
+ DEBUG(dumpBBs());
+
/// Remove dead constant pool entries.
RemoveUnusedCPEntries();
@@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
// Shrink 32-bit Thumb2 branch, load, and store instructions.
- if (isThumb2)
+ if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= OptimizeThumb2Instructions(MF);
// After a while, this might be made debug-only, but it is not expensive.
@@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ DEBUG(errs() << '\n'; dumpBBs());
+
BBSizes.clear();
BBOffsets.clear();
WaterList.clear();
@@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
case ARM::tBR_JTr:
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
MF.EnsureAlignment(2U);
if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
@@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- // Figure out how large the first NewMBB is. (It cannot
- // contain a constpool_entry or tablejump.)
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
-
unsigned OrigBBI = OrigBB->getNumber();
unsigned NewBBI = NewBB->getNumber();
- // Set the size of NewBB in BBSizes.
- BBSizes[NewBBI] = NewBBSize;
- // We removed instructions from UserMBB, subtract that off from its size.
- // Add 2 or 4 to the block to count the unconditional branch we added to it.
int delta = isThumb1 ? 2 : 4;
- BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
// ...and adjust BBOffsets for NewBB accordingly.
BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
// All BBOffsets following these blocks must be modified.
- AdjustBBOffsetsAfter(NewBB, delta);
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
return NewBB;
}
@@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
}
// Thumb1 jump tables require padding. They should be at the end;
// following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
MachineInstr *ThumbJTMI = prior(MBB->end());
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
@@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
MachineBasicBlock::iterator MI = UserMI;
++MI;
unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
U.MaxDisp, U.NegOk, U.IsSoImm)) {
@@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
CPUIndex++;
}
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
}
+
DEBUG(errs() << "Split in middle of big block\n");
- NewMBB = SplitBlockBeforeInstr(prior(MI));
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
}
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9c62597b4323..fc2e3c3fadae 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,14 +19,21 @@
#include "ARMBaseInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
+ // Constants for register spacing in NEON load/store instructions.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
public:
static char ID;
- ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -41,6 +48,10 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
};
char ARMExpandPseudo::ID = 0;
}
@@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
}
}
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
+ }
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (hasWriteBack) {
+ bool WBIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned WBReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ // For an instruction writing the odd subregs, add an implicit use of the
+ // super-register because the even subregs were loaded separately.
+ if (RegSpc == OddDblSpc)
+ MIB.addReg(DstReg, RegState::Implicit);
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+ if (hasWriteBack) {
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+ }
+
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ MI.eraseFromParent();
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ bool ModifiedOp = true;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
+ default:
+ ModifiedOp = false;
+ break;
+
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- Modified = true;
break;
}
+ case ARM::MOVi32imm:
case ARM::t2MOVi32imm: {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
@@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
const MachineOperand &MO = MI.getOperand(1);
MachineInstrBuilder LO16, HI16;
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVi16 : ARM::t2MOVi16),
DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVTi16 : ARM::t2MOVTi16))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(DstReg);
@@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
- Modified = true;
break;
}
@@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
- Modified = true;
}
+
+ case ARM::VLD1q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
+ case ARM::VLD1q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
+
+ case ARM::VLD2d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
+ case ARM::VLD2d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
+
+ case ARM::VLD3d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
+ case ARM::VLD3d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VLD4d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
+ case ARM::VLD4d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+
+ case ARM::VST1q8Pseudo:
+ ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo:
+ ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo:
+ ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo:
+ ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
+ case ARM::VST1q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
+
+ case ARM::VST2d8Pseudo:
+ ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo:
+ ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo:
+ ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo:
+ ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo:
+ ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo:
+ ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
+ case ARM::VST2d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
+
+ case ARM::VST3d8Pseudo:
+ ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo:
+ ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo:
+ ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo:
+ ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
+ case ARM::VST3d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VST4d8Pseudo:
+ ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo:
+ ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo:
+ ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo:
+ ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
+ case ARM::VST4d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
}
+
+ if (ModifiedOp)
+ Modified = true;
MBBI = NMBBI;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 000000000000..4892eae95833
--- /dev/null
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,665 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMFastISel("arm-fast-isel",
+ cl::desc("Turn on experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const ARMFunctionInfo *AFI;
+
+ // Convenience variable to avoid checking all the time.
+ bool isThumb;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ virtual bool ARMSelectLoad(const Instruction *I);
+ virtual bool ARMSelectStore(const Instruction *I);
+ virtual bool ARMSelectBranch(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, EVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, EVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
+ bool ARMLoadAlloca(const Instruction *I);
+ bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
+ bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
+
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+// #include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ // TODO: This should be safe for fp because they're just bits from the
+ // Constant.
+ // TODO: Theoretically we could materialize fp constants with instructions
+ // from VFP3.
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ // Different addressing modes between ARM/Thumb2 for constant pool loads.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci))
+ .addReg(DestReg).addConstantPoolIndex(Idx));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp))
+ .addReg(DestReg).addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0));
+
+ return DestReg;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
+ VT = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (VT == MVT::Other || !VT.isSimple()) return false;
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the Reg+Offset to get to an object.
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
+ int &Offset) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ //errs() << "Failing Opcode is: " << *Op1 << "\n";
+ break;
+ case Instruction::Alloca: {
+ assert(false && "Alloca should have been handled earlier!");
+ return false;
+ }
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ //errs() << "Failing GV is: " << GV << "\n";
+ (void)GV;
+ return false;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ Reg = getRegForValue(Obj);
+ if (Reg == 0) return false;
+
+ // Since the offset may be too large for the load instruction
+ // get the reg+offset into a register.
+ // TODO: Verify the additions work, otherwise we'll need to add the
+ // offset instead of 0 to the instructions and do all sorts of operand
+ // munging.
+ // TODO: Optimize this somewhat.
+ if (Offset != 0) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ ResultReg, SI->second, RC,
+ TM.getRegisterInfo());
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
+ unsigned Reg, int Offset) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Trying to emit for an unhandled type!");
+ return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
+ VT = MVT::i32;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
+ VT = MVT::i32;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tLDR : ARM::LDR;
+ break;
+ }
+
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // TODO: Fix the Addressing modes so that these can share some code.
+ // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
+ Value *Op1 = I->getOperand(1);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ assert(SrcReg != 0 && "Nothing to store!");
+ TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ SrcReg, true /*isKill*/, SI->second, RC,
+ TM.getRegisterInfo());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
+ unsigned DstReg, int Offset) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
+ case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
+ case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Yay type legalization
+ EVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // If we're an alloca we know we have a frame index and can emit the store
+ // quickly.
+ if (ARMStoreAlloca(I, SrcReg))
+ return true;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
+
+ return false;
+
+}
+
+bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
+ // If we're an alloca we know we have a frame index and can emit the load
+ // directly in short order.
+ if (ARMLoadAlloca(I))
+ return true;
+
+ // Verify we have a legal type before going any further.
+ EVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addReg(CondReg));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+ // No Thumb-1 for now.
+ if (isThumb && !AFI->isThumb2Function()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return ARMSelectLoad(I);
+ case Instruction::Store:
+ return ARMSelectStore(I);
+ case Instruction::Br:
+ return ARMSelectBranch(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 000000000000..85b0c6c248d0
--- /dev/null
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,212 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+ class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function& F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td):
+ TD(td) { }
+
+ bool operator() (const GlobalVariable* GV1,
+ const GlobalVariable* GV2) {
+ const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<const Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; MergedSize < MaxOffset && j != e; ++j) {
+ const Type* Ty = Globals[j]->getType()->getElementType();
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ MergedSize += TD->getTypeAllocSize(Ty);
+ }
+
+ StructType* MergedTy = StructType::get(M.getContext(), Tys);
+ Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "merged");
+ for (size_t k = i; k < j; ++k) {
+ SmallVector<Constant*, 2> Idx;
+ Idx.push_back(ConstantInt::get(Int32Ty, 0));
+ Idx.push_back(ConstantInt::get(Int32Ty, k-i));
+
+ Constant* GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedGV,
+ &Idx[0], Idx.size());
+
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ if (I->getAlignment() != 0)
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
+ if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function& F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c84d3ff81324..51a30c158dd1 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -36,6 +36,11 @@
using namespace llvm;
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -113,6 +118,16 @@ public:
bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
@@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
SDValue &BaseReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
break;
}
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (NumVecs < 2)
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
return VLd;
- SDValue RegSeq;
- SDValue V0 = SDValue(VLd, 0);
- SDValue V1 = SDValue(VLd, 1);
-
- // Form a REG_SEQUENCE to force register allocation.
- if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- else {
- SDValue V2 = SDValue(VLd, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLd, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
+ SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, RegSeq);
+ dl, VT, SuperReg);
ReplaceUses(SDValue(N, Vec), D);
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(2 * NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 2 * NumVecs);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
- // Combine the even and odd subregs to produce the result.
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
- Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
- Chain = SDValue(VLdB, NumVecs+1);
-
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
+
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
@@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ if (Pred_t2_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
+ if (Pred_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
@@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
-
- // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
- MVT::v2f64, MVT::Other, Ops, 5);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
- // Other cases are autogenerated.
- break;
- }
- case ISD::STORE: {
- // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
- AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
// Other cases are autogenerated.
break;
}
@@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD1d64T };
- unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
- ARM::VLD3q16_UPD,
- ARM::VLD3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
- ARM::VLD3q16odd_UPD,
- ARM::VLD3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD1d64Q };
- unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
- ARM::VLD4q16_UPD,
- ARM::VLD4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
- ARM::VLD4q16odd_UPD,
- ARM::VLD4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST1d64T };
- unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
- ARM::VST3q16_UPD,
- ARM::VST3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
- ARM::VST3q16odd_UPD,
- ARM::VST3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST1d64Q };
- unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
- ARM::VST4q16_UPD,
- ARM::VST4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
- ARM::VST4q16odd_UPD,
- ARM::VST4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0091df753eb7..ce4a2c90689c 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(true));
+ cl::init(false));
+
+// This option should go away when Machine LICM is smart enough to hoist a
+// reg-to-reg VDUP.
+static cl::opt<bool>
+EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
+ cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
+ cl::init(false));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
@@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
}
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
@@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
- // use the default expansion.
- bool canHandleAtomics =
- (Subtarget->hasV7Ops() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
- if (canHandleAtomics) {
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasV6T2Ops())
+ setTargetDAGCombine(ISD::OR);
+
setStackPointerRegisterToSaveRestore(ARM::SP);
if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
@@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
@@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::AND: return "ARMISD::AND";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
@@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
}
}
@@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
@@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
CCState &State, bool CanFail) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
@@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
if (HiRegList[i] == Reg)
break;
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
LocVT, LocInfo));
@@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // v6 and v7 can both handle barriers directly, but need handled a bit
- // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // Some subtargets which have dmb and dsb instructions can handle barriers
+ // directly. Some ARMv6 cpus can support them with the help of mcr
+ // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasV7Ops())
+ if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ else {
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
- assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return SDValue();
+ }
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
}
SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
- EVT VT = Node->getValueType(0);
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- SDValue Align = Op.getOperand(2);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
- unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
- if (AlignVal > StackAlign)
- // Do this now since selection pass cannot introduce new target
- // independent node.
- Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
-
- // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
- // using a "add r, sp, r" instead. Negate the size now so we don't have to
- // do even more horrible hack later.
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->isThumb1OnlyFunction()) {
- bool Negate = true;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
- if (C) {
- uint32_t Val = C->getZExtValue();
- if (Val <= 508 && ((Val & 3) == 0))
- Negate = false;
- }
- if (Negate)
- Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
- }
-
- SDVTList VTList = DAG.getVTList(VT, MVT::Other);
- SDValue Ops1[] = { Chain, Size, Align };
- SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
- Chain = Res.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
- DAG.getIntPtrConstant(0, true), SDValue());
- SDValue Ops2[] = { Res, Chain };
- return DAG.getMergeValues(Ops2, 2, dl);
-}
-
-SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalICmpImmediate(C-1)) {
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
bool SeenZero = false;
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case.
- (FiniteOnlyFPMath() || SeenZero)) {
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
// If unsafe fp math optimization is enabled and there are no othter uses of
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
// to an integer comparison.
@@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
@@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
- // If all elements are constants, fall back to the default expansion, which
- // will generate a load from the constant pool.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ if (EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
if (isConstant)
return SDValue();
- // Use VDUP for non-constant splats.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (!EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ }
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
@@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
}
+/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
+/// an extending load, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLs;
+ } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLu;
+ } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else {
+ // Other vector multiplications are legal.
+ return Op;
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
@@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
- case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
}
return SDValue();
}
@@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
- case ARM::tANDsp:
- case ARM::tADDspr_:
- case ARM::tSUBspi_:
- case ARM::t2SUBrSPi_:
- case ARM::t2SUBrSPi12_:
- case ARM::t2SUBrSPs_: {
- MachineFunction *MF = BB->getParent();
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool DstIsDead = MI->getOperand(0).isDead();
- bool SrcIsKill = MI->getOperand(1).isKill();
-
- if (SrcReg != ARM::SP) {
- // Copy the source to SP from virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- unsigned OpOpc = 0;
- bool NeedPred = false, NeedCC = false, NeedOp3 = false;
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected pseudo instruction!");
- case ARM::tANDsp:
- OpOpc = ARM::tAND;
- NeedPred = true;
- break;
- case ARM::tADDspr_:
- OpOpc = ARM::tADDspr;
- break;
- case ARM::tSUBspi_:
- OpOpc = ARM::tSUBspi;
- break;
- case ARM::t2SUBrSPi_:
- OpOpc = ARM::t2SUBrSPi;
- NeedPred = true; NeedCC = true;
- break;
- case ARM::t2SUBrSPi12_:
- OpOpc = ARM::t2SUBrSPi12;
- NeedPred = true;
- break;
- case ARM::t2SUBrSPs_:
- OpOpc = ARM::t2SUBrSPs;
- NeedPred = true; NeedCC = true; NeedOp3 = true;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
- if (OpOpc == ARM::tAND)
- AddDefaultT1CC(MIB);
- MIB.addReg(ARM::SP);
- MIB.addOperand(MI->getOperand(2));
- if (NeedOp3)
- MIB.addOperand(MI->getOperand(3));
- if (NeedPred)
- AddDefaultPred(MIB);
- if (NeedCC)
- AddDefaultCC(MIB);
-
- // Copy the result from SP to virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(ARM::SP);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
}
}
@@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
-/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
-static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI) {
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
-
return SDValue();
}
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
static SDValue PerformSUBCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
@@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask = C->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ if ((C = dyn_cast<ConstantSDNode>(N1))) {
+ unsigned Val = C->getZExtValue();
+ if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+ return SDValue();
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask2 = C->getZExtValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ ARM::isBitFieldInvertedMask(~Mask2) &&
+ (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned lsb = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ ARM::isBitFieldInvertedMask(Mask2) &&
+ (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
// If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
// a NaN; only do the transformation when it matches that behavior.
@@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
@@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) {
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 128b72e1e743..ba9ea7f15e7b 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,6 +17,8 @@
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include <vector>
@@ -45,6 +47,8 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
+ AND, // ARM "and" instruction that sets the 's' flag in CPSR.
+
CMP, // ARM compare instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
@@ -80,7 +84,7 @@ namespace llvm {
MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier
-
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -141,6 +145,10 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -150,7 +158,10 @@ namespace llvm {
// Floating-point max and min:
FMAX,
- FMIN
+ FMIN,
+
+ // Bit-field insert
+ BFI
};
}
@@ -162,6 +173,7 @@ namespace llvm {
/// returns -1.
int getVFPf32Imm(const APFloat &FPImm);
int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
}
//===--------------------------------------------------------------------===//
@@ -171,6 +183,8 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
+ virtual unsigned getJumpTableEncoding(void) const;
+
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -255,8 +269,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -265,11 +290,17 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -310,14 +341,15 @@ namespace llvm {
SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -377,6 +409,10 @@ namespace llvm {
unsigned BinOpcode) const;
};
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
}
#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index ac568e75ccc4..113cfffe61f9 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>;
def LdStExFrm : Format<11>;
def ArithMiscFrm : Format<12>;
-def ExtFrm : Format<13>;
-
-def VFPUnaryFrm : Format<14>;
-def VFPBinaryFrm : Format<15>;
-def VFPConv1Frm : Format<16>;
-def VFPConv2Frm : Format<17>;
-def VFPConv3Frm : Format<18>;
-def VFPConv4Frm : Format<19>;
-def VFPConv5Frm : Format<20>;
-def VFPLdStFrm : Format<21>;
-def VFPLdStMulFrm : Format<22>;
-def VFPMiscFrm : Format<23>;
-
-def ThumbFrm : Format<24>;
-def MiscFrm : Format<25>;
-
-def NGetLnFrm : Format<26>;
-def NSetLnFrm : Format<27>;
-def NDupFrm : Format<28>;
-def NLdStFrm : Format<29>;
-def N1RegModImmFrm: Format<30>;
-def N2RegFrm : Format<31>;
-def NVCVTFrm : Format<32>;
-def NVDupLnFrm : Format<33>;
-def N2RegVShLFrm : Format<34>;
-def N2RegVShRFrm : Format<35>;
-def N3RegFrm : Format<36>;
-def N3RegVShFrm : Format<37>;
-def NVExtFrm : Format<38>;
-def NVMulSLFrm : Format<39>;
-def NVTBLFrm : Format<40>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
// Misc flags.
@@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
class AddrMode<bits<4> val> {
bits<4> Value = val;
}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1 : AddrMode<1>;
-def AddrMode2 : AddrMode<2>;
-def AddrMode3 : AddrMode<3>;
-def AddrMode4 : AddrMode<4>;
-def AddrMode5 : AddrMode<5>;
-def AddrMode6 : AddrMode<6>;
-def AddrModeT1_1 : AddrMode<7>;
-def AddrModeT1_2 : AddrMode<8>;
-def AddrModeT1_4 : AddrMode<9>;
-def AddrModeT1_s : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<11>;
-def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
// Instruction size.
@@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
// ARM special operands.
//
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
@@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
@@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsARM];
}
-// Same as I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
@@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
}
class ABXIx2<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin,
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
asm, "", pattern>;
// BR_JT instructions
@@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-
// Atomic load/store instructions
-
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 1;
+ let Inst{20} = 1;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 0;
+ let Inst{20} = 0;
let Inst{11-4} = 0b11111001;
}
@@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
: I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// loads
@@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// stores
@@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed loads
@@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed stores
@@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed loads
@@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed stores
@@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// addrmode3 instructions
@@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
- let Inst{12} = opcod3;
+ let Inst{12} = opcod3;
}
// BR_JT instructions
@@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 {
// A6.2.4 Load/store single data item encoding.
class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
- let Inst{11-9} = opB;
+ let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
+class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
-class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
@@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
-// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
- let Inst{22} = 1;
- let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{24} = P;
+ let Inst{23} = ?; // The U bit.
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = load;
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
- let Inst{24} = signed;
- let Inst{23} = 0;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
let Inst{22-21} = opcod;
- let Inst{20} = load;
- let Inst{11} = 1;
+ let Inst{20} = load;
+ let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
- let Inst{10} = pre; // The P bit.
- let Inst{8} = 1; // The W bit.
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
}
// Helper class for disassembly only
@@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b011;
- let Inst{23} = long;
+ let Inst{23} = long;
let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ let Inst{7-4} = op7_4;
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
}
// Load / store multiple
-class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let D = VFPNeonDomain;
}
-class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Double precision, binary, VML[AS] (for additional predicate)
@@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
-
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1010;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Single precision binary, if no NEON
@@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
: NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-24} = 0b11110100;
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{7-4} = op7_4;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
}
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
string opc, string dt, string asm, string cstr,
list<dag> pattern>
: NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-19} = op21_19;
- let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{5} = op5;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
}
// NEON 2 vector register format.
@@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 2 vector register with immediate.
@@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 3 vector register format.
@@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON VMOVs between scalar and core registers.
@@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
"", itin> {
let Inst{27-20} = opcod1;
- let Inst{11-8} = opcod2;
- let Inst{6-5} = opcod3;
- let Inst{4} = 1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
@@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
let Inst{24-23} = 0b11;
let Inst{21-20} = 0b11;
let Inst{19-16} = op19_16;
- let Inst{11-7} = 0b11000;
- let Inst{6} = op6;
- let Inst{4} = 0;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 51fc1522485f..e66f9b9ad0ac 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
SDTCisVT<5, OtherVT>]>;
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
+def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd,
+ [SDNPOutFlag]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag,SDNPCommutative]>;
+ [SDNPOutFlag, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
-def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
+def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
- uint32_t v = (uint32_t)N->getZExtValue();
- if (v == 0xffffffff)
- return 0;
- // there can be 1's on either or both "outsides", all the "inside"
- // bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
/// Split a 32-bit immediate into two 16 bit parts.
-def lo16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
- MVT::i32);
-}]>;
-
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
}]>;
@@ -306,6 +302,13 @@ def pclabel : Operand<i32> {
let PrintMethod = "printPCLabel";
}
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
@@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm
// represented in the imm field in the same 12-bit form that they are encoded
// into so_imm instructions: the 8-bit immediate is the least significant bits
// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
-def so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
- }]> {
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
let PrintMethod = "printSOImmOperand";
}
@@ -452,11 +452,15 @@ include "ARMInstrFormats.td"
/// binop that produces a value.
multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{25} = 1;
}
+ }
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
@@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
@@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in {
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target \n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
@@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTm : JTI<(outs),
(ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "ldr\tpc, $target \n$jt",
+ IIC_Br, "ldr\tpc, $target$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTadd : JTI<(outs),
(ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "add\tpc, $target, $idx \n$jt",
+ IIC_Br, "add\tpc, $target, $idx$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111,
defm UXTB16 : AI_unary_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 24)>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
@@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
+ IIC_iALUi, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ IIC_iALUr, "rsb", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
+
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
+ IIC_iALUsr, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
let Inst{25} = 0;
}
@@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
Requires<[IsARM]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
@@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */]>
: AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]> {
+ opc, "\t$dst, $a, $b", pattern> {
let Inst{27-20} = op27_20;
let Inst{7-4} = op7_4;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD : AAI<0b00010000, 0b0101, "qadd",
+ [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
def QASX : AAI<0b01100010, 0b0011, "qasx">;
def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub",
+ [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
@@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
// Signed/Unsigned saturate -- for disassembly only
-def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b001;
-}
-
-def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101010;
let Inst{7-4} = 0b0011;
}
-def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b001;
-}
-
-def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101110;
let Inst{7-4} = 0b0011;
}
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ANDS : AI1_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
@@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-// Added for disassembler with the pattern field purposely left blank.
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $src, $imm", "",
- [/* For disassembly only; pattern left blank */]>,
+ "bfi", "\t$dst, $val, $imm", "$src = $dst",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
@@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
let Inst{19-16} = 0b1111;
}
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+ (and (shl GPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b001;
@@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
(PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
+ (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ (and (sra GPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b101;
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
+
+// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
+// and comparison with 0. These two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// The AddWithCarry in the CMP case seems to be relying upon the identity:
+//
+// ~x + 1 = -x
+//
+// However when x is 0 and unsigned, this doesn't hold:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable *all* versions of CMN, especially when comparing
+// against zero, until we can limit when the CMN instruction is used (when we
+// know that the RHS is not 0) or when we have a hardware fix for this.
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
//defm CMN : AI1_cmp_irs<0b1011, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
@@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
def BCCi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
- IIC_Br,
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
@@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def Int_MemBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_SyncBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierV6 GPR:$zero)]>,
+ [(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierV6 GPR:$zero)]>,
+ [(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding
}
}
-// Helper class for multiclass MemB -- for disassembly only
-class AMBI<string opc, string asm>
- : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
-}
-
-multiclass MemB<bits<4> op7_4, string opc> {
-
- def st : AMBI<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : AMBI<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : AMBI<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : AMBI<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : AMBI<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
+// Memory Barrier Operations Variants -- for disassembly only
- def osh : AMBI<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+}
- def oshst : AMBI<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+class AMBI<bits<4> op7_4, string opc>
+ : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-8} = 0xf57ff0;
+ let Inst{7-4} = op7_4;
}
// These DMB variants are for disassembly only.
-defm DMB : MemB<0b0101, "dmb">;
+def DMBvar : AMBI<0b0101, "dmb">;
// These DSB variants are for disassembly only.
-defm DSB : MemB<0b0100, "dsb">;
+def DSBvar : AMBI<0b0100, "dsb">;
// ISB has only full system option -- for disassembly only
-def ISBsy : AMBI<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7f7eb980abe8..4d2f1169061f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
@@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
@@ -124,15 +129,16 @@ def nModImm : Operand<i32> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
def VLDMQ
- : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+ "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
// This is equivalent to VLD1q64 except that it has a Q register operand.
@@ -141,15 +147,16 @@ def VLD1q
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
def VSTMQ
- : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+ "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
// This is equivalent to VST1q64 except that it has a Q register operand.
@@ -160,6 +167,25 @@ def VST1q
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQWBPseudo
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQPseudo
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQQWBPseudo
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQQQWBPseudo
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb, $src = $dst">;
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
@@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">;
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8Pseudo : VLDQPseudo;
+def VLD1q16Pseudo : VLDQPseudo;
+def VLD1q32Pseudo : VLDQPseudo;
+def VLD1q64Pseudo : VLDQPseudo;
+
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
@@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+def VLD1q8Pseudo_UPD : VLDQWBPseudo;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d64TPseudo : VLDQQPseudo;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
@@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d64QPseudo : VLDQQPseudo;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
@@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2d8Pseudo : VLDQPseudo;
+def VLD2d16Pseudo : VLDQPseudo;
+def VLD2d32Pseudo : VLDQPseudo;
+
+def VLD2q8Pseudo : VLDQQPseudo;
+def VLD2q16Pseudo : VLDQQPseudo;
+def VLD2q32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
@@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
@@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo : VLDQQPseudo;
+def VLD3d16Pseudo : VLDQQPseudo;
+def VLD3d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
@@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo : VLDQQPseudo;
+def VLD4d16Pseudo : VLDQQPseudo;
+def VLD4d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
@@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
+class VSTQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
+class VSTQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
@@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo;
+def VST1q16Pseudo : VSTQPseudo;
+def VST1q32Pseudo : VSTQPseudo;
+def VST1q64Pseudo : VSTQPseudo;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo;
+def VST1q16Pseudo_UPD : VSTQWBPseudo;
+def VST1q32Pseudo_UPD : VSTQWBPseudo;
+def VST1q64Pseudo_UPD : VSTQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d64TPseudo : VSTQQPseudo;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
@@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d64QPseudo : VSTQQPseudo;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
@@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo;
+def VST2d16Pseudo : VSTQPseudo;
+def VST2d32Pseudo : VSTQPseudo;
+
+def VST2q8Pseudo : VSTQQPseudo;
+def VST2q16Pseudo : VSTQQPseudo;
+def VST2q32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo;
+def VST2d16Pseudo_UPD : VSTQWBPseudo;
+def VST2d32Pseudo_UPD : VSTQWBPseudo;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
@@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8Pseudo : VSTQQPseudo;
+def VST3d16Pseudo : VSTQQPseudo;
+def VST3d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
@@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8Pseudo : VSTQQPseudo;
+def VST4d16Pseudo : VSTQQPseudo;
+def VST4d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+ [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
-// Long 2-register intrinsics (currently only used for VMOVL).
-class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
@@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_8:$src3),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))))]>;
+
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let isCommutable = Commutable;
}
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
+ (TyD DPR:$src2))))))]> {
+ let isCommutable = Commutable;
+}
+
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
imm:$lane)))))]>;
-// Wide 3-register intrinsics.
-class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
let isCommutable = Commutable;
}
@@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
- def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
- def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
@@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v8i16, v8i8, IntOp, Commutable>;
}
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
-multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i64, v2i32, IntOp, Commutable>;
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
@@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
mul, ShOp>;
}
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", int_arm_neon_vmullu, 1>;
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
- int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
- int_arm_neon_vmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", int_arm_neon_vabds, 0>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", int_arm_neon_vabdu, 0>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
- "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
- "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", int_arm_neon_vabau>;
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
@@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
- "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+ "vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
@@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
- int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
- int_arm_neon_vmovlu>;
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
// Vector Conversions.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bc0790dccbb5..a13ff1232749 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
// ADD rd, sp, #imm8
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in {
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $sp, $rhs", []>,
T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+}
// ADD sp, sp, #imm7
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
@@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
let Inst{2-0} = 0b101;
}
-// Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
-
-def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "${:comment} add\t$dst, $rhs", []>;
-
-let Defs = [CPSR] in
-def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "${:comment} and\t$dst, $rhs", []>;
-} // usesCustomInserter
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
@@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in {
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt",
+ IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
[(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
Encoding16 {
let Inst{15-7} = 0b010001101;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bbe675e81ab1..6ba0a44be470 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm
ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
[shl,srl,sra,rotr]> {
let PrintMethod = "printT2SOOperand";
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops rGPR, i32imm);
}
// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
@@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// represented in the imm field in the same 12-bit form that they are encoded
// into t2_so_imm instructions: the 8-bit immediate is the least significant
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-}]>;
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> {
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
}
@@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>,
multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+ def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let Inst{31-27} = 0b11110;
@@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide =""> {
+ bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_irs counterpart.
-multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
+ // register
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
+ opc, "\t$dst, $rhs, $lhs",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = ?; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -289,9 +298,9 @@ let Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
+ }
// 12-bit imm
- def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
+ def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
!strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24} = 0;
@@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -382,9 +395,9 @@ let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -423,9 +436,9 @@ let Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
!strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
// 5-bit imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
@@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+ def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, GPR:$rhs)]> {
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+ [(set rGPR:$dst, (opnode rGPR:$src))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS,
+ (rotr rGPR:$RHS, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// DO variant - disassembly only, no pattern
multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// assembler.
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #$label", []> {
+def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+ "adr${p}.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{15} = 0;
}
} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #${label}_${id}", []> {
+ "adr${p}.w\t$dst, #${label}_${id}", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
}
// Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"sdiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"udiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
@@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
-def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
-def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
-} // usesCustomInserter
-
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
@@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
-def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins i32imm:$addr), IIC_iLoadi,
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
@@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
@@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
"mov", ".w\t$dst, $src",
- [(set GPR:$dst, t2_so_imm:$src)]> {
+ [(set rGPR:$dst, t2_so_imm:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]> {
+ [(set rGPR:$dst, imm0_65535:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
}
let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
"movt", "\t$dst, $imm",
- [(set GPR:$dst,
- (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ [(set rGPR:$dst,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>;
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
//===----------------------------------------------------------------------===//
// Extend Instructions.
@@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
-def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
-defm t2RSB : T2I_rbin_is <0b1110, "rsb",
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
@@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm),
- (t2SUBSri GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde GPR:$src, imm0_255_not:$imm),
- (t2SBCSri GPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
-class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
- : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
- "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */]>
+ : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", pat> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
// Saturating add/subtract -- for disassembly only
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b),
NoItinerary, "usad8", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
"\t$dst, $a, $b, $acc", []>;
// Signed/Unsigned saturate -- for disassembly only
-def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
@@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"rrx", "\t$dst, $src",
- [(set GPR:$dst, (ARMrrx GPR:$src))]> {
+ [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"lsrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"asrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+
let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$dst, $imm",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> {
+ [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{15} = 0;
}
-def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
let Inst{15} = 0;
}
-def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-// Added for disassembler with the pattern field purposely left blank.
-// FIXME: Utilize this instruction in codgen.
-def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+let Constraints = "$src = $dst" in
+def t2BFI : T2I<(outs rGPR:$dst),
+ (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
+ IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
+ bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>>;
+ (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
-def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm),
- (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
-def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm),
- (t2ORNri GPR:$src, t2_so_imm_not:$imm)>,
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
Requires<[IsThumb2]>;
def : T2Pat<(t2_so_imm_not:$src),
@@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"smull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
let Inst{7-4} = 0b0000;
}
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"umull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"smlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umaal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmulr", "\t$dst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlar", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlsr", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc,
+ (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
"\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
@@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
let Inst{5-4} = op2;
}
-def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
-def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>;
+ [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
-def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
-def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rev16", ".w\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
+ [(set rGPR:$dst,
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
-def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"revsh", ".w\t$dst, $src",
- [(set GPR:$dst,
+ [(set rGPR:$dst,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>;
+ (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
+ (shl rGPR:$src, (i32 8))), i16))]>;
-def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
+ (and (shl rGPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
@@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
}
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>,
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
+ (and (sra rGPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
@@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst",
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
-// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero.
-// Short range conditional branch. Looks awesome for loops. Need to figure
-// out how to use this one.
-
-
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
+def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
-def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
+def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod; // Shift type.
}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
} // neverHasSideEffects
@@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
-def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;
@@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
"", []>;
-def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary,
"ldrex", "\t$dest, [$ptr]", "",
[]> {
@@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"ldrexd", "\t$dest, $dest2, [$ptr]", "",
[], {?, ?, ?, ?}>;
}
let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strex", "\t$success, $src, [$ptr]", "",
[]> {
@@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
- (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
+ (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexd", "\t$success, $src, $src2, [$ptr]", "", [],
{?, ?, ?, ?}>;
@@ -2416,7 +2414,7 @@ let Defs =
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2425,14 +2423,14 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2441,7 +2439,7 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT :
T2JTI<(outs),
(ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0100100;
@@ -2496,7 +2494,7 @@ def t2BR_JT :
def t2TBB :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbb\t$index\n$jt", []> {
+ IIC_Br, "tbb\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2507,7 +2505,7 @@ def t2TBB :
def t2TBH :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbh\t$index\n$jt", []> {
+ IIC_Br, "tbh\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
}
// Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000011; // W = 1
}
-def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000001; // W = 0
}
-def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011011; // W = 1
}
-def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011001; // W = 0
@@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
//
// Two piece so_imms.
-def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
+ (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
(t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable. Remove
// when we can do generalized remat.
let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>;
+ [(set rGPR:$dst, (i32 imm:$src))]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
//
// Rd = Instr{11-8}
-def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
}
// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
}
// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tcpsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
}
// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tspsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 84c23e1a784c..c29e09606bd4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
-def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
-def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
@@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only.
-
+let Uses = [FPSCR] in {
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
+}
// Convert between floating-point and fixed-point
// Data type for fixed-point naming convention:
@@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
+let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
@@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
// Fixed-Point to FP:
+let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
@@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
} // End of 'let Constraints = "$src = $dst" in'
@@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
}
// FPSCR <-> GPR (for disassembly only)
-
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\t$dst, fpscr",
- [/* For disassembly only; pattern left blank */]> {
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
+ "vmrs", "\t$dst, fpscr",
+ [(set GPR:$dst, (int_arm_get_fpscr))]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
- "\tfpscr, $src",
- [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
+ "vmsr", "\tfpscr, $src",
+ [(int_arm_set_fpscr GPR:$src)]> {
let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f80e316d23e8..2b7645a42119 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
- if (isAM4 && Offset == 4) {
- if (isThumb2)
- // Thumb2 does not support ldmib / stmib.
- return false;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
Mode = ARM_AM::ib;
- } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
- if (isThumb2)
- // Thumb2 does not support ldmda / stmda.
- return false;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
Mode = ARM_AM::da;
- } else if (isAM4 && Offset == -4 * (int)NumRegs) {
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
- MachineInstrBuilder MIB = (isAM4)
- ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
- : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
- .addImm(Pred).addReg(PredReg);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
memOps[j].MBBI->getOperand(0).setIsKill(false);
+ memOps[j].isKill = false;
}
}
MBB.erase(memOps[i].MBBI);
@@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned insertAfter = SIndex;
@@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(Reg);
- // AM4 - register numbers in ascending order.
- // AM5 - consecutive register numbers in ascending order.
- // Can only do up to 16 double-word registers per insn.
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum)
+ ((isNotVFP && RegNum > PRegNum)
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
@@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
return false;
// Make sure the offset fits in 8 bits.
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
@@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
MI->getOpcode() != ARM::ADDri)
return false;
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
@@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
+ return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMD:
case ARM::VSTMD:
- return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ return (MI->getNumOperands() - 4) * 8;
}
}
@@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- unsigned Offset = 0;
- if (isAM4) {
- // Can't use an updating ld/st if the base register is also a dest
- // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).getReg() == Base)
- return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
- if (isAM4) {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::db;
- } else if (isAM4 && Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::da;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- }
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
@@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
- if (isAM4) {
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
if (DoMerge) {
if (NextMBBI == I) {
@@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(BaseKill));
- if (isAM4) {
- // [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
- .addImm(Pred).addReg(PredReg);
- } else {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
- .addImm(Pred).addReg(PredReg);
- }
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
+ ARM_AM::db : ARM_AM::ia);
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
MachineOperand &MO = MI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
@@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b60783..ab2b06b60783 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h
index b81a30690ce2..b81a30690ce2 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/ARMMCInstLower.h
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1ca5576..514c26b4daf0 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index d020f3c74bde..305b232e6a99 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // FP is R11, R9 is available.
- static const unsigned ARM_GPR_AO_1[] = {
+ static const unsigned ARM_GPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R12,ARM::LR,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10,
- ARM::R11 };
- // FP is R11, R9 is not available.
- static const unsigned ARM_GPR_AO_2[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R10,
- ARM::R11 };
- // FP is R7, R9 is available as non-callee-saved register.
- // This is used by Darwin.
- static const unsigned ARM_GPR_AO_3[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R9, ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,ARM::R7 };
- // FP is R7, R9 is not available.
- static const unsigned ARM_GPR_AO_4[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,
- ARM::R7 };
- // FP is R7, R9 is available as callee-saved register.
- // This is used by non-Darwin platform in Thumb mode.
- static const unsigned ARM_GPR_AO_5[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
// For Thumb1 mode, we don't want to allocate hi regs at all, as we
// don't know how to spill them. If we make our prologue/epilogue code
@@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_4;
- else
- return ARM_GPR_AO_3;
- } else {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_2;
- else if (Subtarget.isThumb())
- return ARM_GPR_AO_5;
- else
- return ARM_GPR_AO_1;
- }
+ return ARM_GPR_AO;
}
GPRClass::iterator
GPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
- } else {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
- else if (Subtarget.isThumb())
- I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
- }
-
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
}
}];
}
-// Thumb registers are R0-R7 normally. Some instructions can still use
-// the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12, LR]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- static const unsigned THUMB_tGPR_AO[] = {
+ static const unsigned ARM_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_rGPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
- // FP is R7, only low registers available.
- tGPRClass::iterator
- tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return THUMB_tGPR_AO;
+ rGPRClass::iterator
+ rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO;
+ return ARM_rGPR_AO;
}
- tGPRClass::iterator
- tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ rGPRClass::iterator
+ rGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- tGPRClass::iterator I =
- THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+ return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
}
}];
}
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
// For tail calls, we can't use callee-saved registers, as they are restored
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
@@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO_TC;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_NOR9_TC;
- else
- return ARM_GPR_R9_TC;
- } else
- // R9 is either callee-saved or reserved; can't use it.
- return ARM_GPR_NOR9_TC;
+ return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
}
tcGPRClass::iterator
tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
- return I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- else
- I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
- } else
- // R9 is either callee-saved or reserved; can't use it.
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- return I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+ return Subtarget.isTargetDarwin() ?
+ ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+ ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
}
}];
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 10fd257055fb..cb539f4c01ec 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowVMLx(false)
+ , SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
+ , NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , FPOnlySP(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e7d92ede9b98..67e58038ee77 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
+ V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -63,6 +63,9 @@ protected:
/// ThumbMode - Indicates supported Thumb version.
ThumbTypeEnum ThumbMode;
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -84,6 +87,18 @@ protected:
/// instructions.
bool HasT2ExtractPack;
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -128,6 +143,8 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool hasARMOps() const { return !NoARM; }
+
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
bool hasVFP3() const { return ARMFPUType >= VFPv3; }
bool hasNEON() const { return ARMFPUType >= NEON; }
@@ -135,8 +152,11 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 09203f9304df..30ff8276cdaa 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
TSInfo(*this) {
}
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+ return false;
+}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));
@@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2())
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
PM.add(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a222e57b13ff..17e5425a9d37 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -50,6 +50,7 @@ public:
}
// Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4b083244b241..75e2a739bf1f 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -18,8 +19,10 @@
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -37,6 +40,7 @@ enum ShiftType {
class ARMAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
private:
MCAsmParser &getParser() const { return Parser; }
@@ -76,26 +80,33 @@ private:
bool ParseDirectiveSyntax(SMLoc L);
- // TODO - For now hacked versions of the next two are in here in this file to
- // allow some parser testing until the table gen versions are implemented.
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst) {
+ if (!MatchInstructionImpl(Operands, Inst))
+ return false;
+
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+
+ return true;
+ }
/// @name Auto-generated Match Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
- /// MatchRegisterName - Match the given string to a register name and return
- /// its register number, or -1 if there is no match. To allow return values
- /// to be used directly in register lists, arm registers have values between
- /// 0 and 15.
- int MatchRegisterName(StringRef Name);
+ unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
+
+ bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
+ &Operands,
+ MCInst &Inst);
/// }
public:
- ARMAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -110,16 +121,21 @@ private:
ARMOperand() {}
public:
enum KindTy {
- Token,
- Register,
+ CondCode,
Immediate,
- Memory
+ Memory,
+ Register,
+ Token
} Kind;
SMLoc StartLoc, EndLoc;
union {
struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
@@ -151,16 +167,19 @@ public:
};
- ARMOperand(KindTy K, SMLoc S, SMLoc E)
- : Kind(K), StartLoc(S), EndLoc(E) {}
+ //ARMOperand(KindTy K, SMLoc S, SMLoc E)
+ // : Kind(K), StartLoc(S), EndLoc(E) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
case Token:
- Tok = o.Tok;
+ Tok = o.Tok;
break;
case Register:
Reg = o.Reg;
@@ -179,6 +198,11 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -194,15 +218,50 @@ public:
return Imm.Val;
}
- bool isToken() const {return Kind == Token; }
+ bool isCondCode() const { return Kind == CondCode; }
+
+ bool isImm() const { return Kind == Immediate; }
bool isReg() const { return Kind == Register; }
+ bool isToken() const {return Kind == Token; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ // FIXME: What belongs here?
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
+ SMLoc S) {
+ Op.reset(new ARMOperand);
+ Op->Kind = CondCode;
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ }
+
static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
SMLoc S) {
Op.reset(new ARMOperand);
@@ -262,6 +321,33 @@ public:
} // end anonymous namespace.
+void ARMOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << ARMCondCodeToString(getCondCode());
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Memory:
+ OS << "<memory>";
+ break;
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name a Reg operand is created, the token is eaten
/// and false is returned. Else true is returned and no token is eaten.
@@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
return false;
}
-/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(StringRef Name) {
- if (Name == "r0" || Name == "R0")
- return 0;
- else if (Name == "r1" || Name == "R1")
- return 1;
- else if (Name == "r2" || Name == "R2")
- return 2;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r4" || Name == "R4")
- return 4;
- else if (Name == "r5" || Name == "R5")
- return 5;
- else if (Name == "r6" || Name == "R6")
- return 6;
- else if (Name == "r7" || Name == "R7")
- return 7;
- else if (Name == "r8" || Name == "R8")
- return 8;
- else if (Name == "r9" || Name == "R9")
- return 9;
- else if (Name == "r10" || Name == "R10")
- return 10;
- else if (Name == "r11" || Name == "R11" || Name == "fp")
- return 11;
- else if (Name == "r12" || Name == "R12" || Name == "ip")
- return 12;
- else if (Name == "r13" || Name == "R13" || Name == "sp")
- return 13;
- else if (Name == "r14" || Name == "R14" || Name == "lr")
- return 14;
- else if (Name == "r15" || Name == "R15" || Name == "pc")
- return 15;
- return -1;
-}
-
-/// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::
-MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst) {
- ARMOperand &Op0 = *(ARMOperand*)Operands[0];
- assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
- StringRef Mnemonic = Op0.getToken();
- if (Mnemonic == "add" ||
- Mnemonic == "stmfd" ||
- Mnemonic == "str" ||
- Mnemonic == "ldmfd" ||
- Mnemonic == "ldr" ||
- Mnemonic == "mov" ||
- Mnemonic == "sub" ||
- Mnemonic == "bl" ||
- Mnemonic == "push" ||
- Mnemonic == "blx" ||
- Mnemonic == "pop") {
- // Hard-coded to a valid instruction, till we have a real matcher.
- Inst = MCInst();
- Inst.setOpcode(ARM::MOVr);
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateReg(0));
- return false;
- }
-
- return true;
-}
-
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
@@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
OwningPtr<ARMOperand> Op;
- ARMOperand::CreateToken(Op, Name, NameLoc);
-
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Determine the predicate, if any.
+ //
+ // FIXME: We need a way to check whether a prefix supports predication,
+ // otherwise we will end up with an ambiguity for instructions that happen to
+ // end with a predicate name.
+ unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Head = Head.slice(0, Head.size() - 2);
+ } else
+ CC = ARMCC::AL;
+
+ ARMOperand::CreateToken(Op, Head, NameLoc);
Operands.push_back(Op.take());
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
+ Operands.push_back(Op.take());
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start, Next);
+ ARMOperand::CreateToken(Op, Head, NameLoc);
+ Operands.push_back(Op.take());
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
OwningPtr<ARMOperand> Op;
if (ParseOperand(Op)) return true;
@@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() {
RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
+
+#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index edc934549b28..8026e7718ca9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
O << '\t' << "vpush";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
O << '\t' << "vpop";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << ' ';
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
assert(0 && "FIXME: Implement printAddrModePCOperand");
}
-void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
int32_t lsb = CountTrailingZeros_32(v);
@@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
O << '#' << lsb << ", #" << width;
}
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "{";
@@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index ddf5047793d2..e5ad0d07e9ba 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -57,6 +57,8 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index 4e299f86ecb6..18645c0864a3 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -1,8 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmPrinter
- ARMAsmPrinter.cpp
ARMInstPrinter.cpp
- ARMMCInstLower.cpp
)
add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0df34666b959..6b4dee5965d2 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums)
tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
tablegen(ARMGenCodeEmitter.inc -gen-emitter)
tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
tablegen(ARMGenCallingConv.inc -gen-callingconv)
tablegen(ARMGenSubtarget.inc -gen-subtarget)
tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
add_llvm_target(ARMCodeGen
+ ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
+ ARMFastISel.cpp
+ ARMGlobalMerge.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCAsmInfo.cpp
+ ARMMCInstLower.cpp
ARMRegisterInfo.cpp
+ ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
@@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
- ARMSelectionDAGInfo.cpp
)
-target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
+target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4de697e8bf67..e22028985b46 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
/// ARMDecoderEmitter.cpp TableGen backend. It contains:
///
@@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
return ARM::BFI;
}
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
@@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
// As a result, the decoder fails to deocode SSAT properly.
if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+ return ARM::SSAT;
// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
@@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
/// decodeInstruction(insn) is invoked on the original insn.
///
/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
if (IsThumb2) {
uint16_t op1 = slice(insn, 28, 27);
uint16_t op2 = slice(insn, 26, 20);
@@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
// passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
// halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
// the top half followed by the second halfword.
- uint32_t insn = 0;
+ unsigned insn = 0;
// Possible second halfword.
uint16_t insn1 = 0;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index a07ff2832aa7..9f493b9aee02 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
/// describing the operand info for each ARMInsts[i].
@@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
RegClassID = ARM::DPRRegClassID;
}
+ // For this purpose, we can treat rGPR as if it were GPR.
+ if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
// See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
unsigned RegNum =
RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
@@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
//
// A8-11: DecodeImmShift()
static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- // If type == 0b11 and imm5 == 0, we have an rrx, instead.
- if (ShOp == ARM_AM::ror && ShImm == 0)
- ShOp = ARM_AM::rrx;
- // If (lsr or asr) and imm5 == 0, shift amount is 32.
- if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
}
// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
@@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
- return true;
-
assert(0 && "Unexpected pseudo instruction!");
return false;
}
@@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- assert(0 && "Unexpected BrMiscFrm Opcode");
return false;
}
@@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
return true;
}
-static inline bool SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
- case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
- return true;
- default:
- return false;
- }
-}
-
-static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::SSATlsl:
- case ARM::SSATasr:
- return slice(insn, 20, 16) + 1;
- case ARM::SSAT16:
- return slice(insn, 19, 16) + 1;
- case ARM::USATlsl:
- case ARM::USATasr:
- return slice(insn, 20, 16);
- case ARM::USAT16:
- return slice(insn, 19, 16);
- default:
- assert(0 && "Invalid opcode passed in");
- return 0;
- }
-}
-
// A major complication is the fact that some of the saturating add/subtract
// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
// They are QADD, QDADD, QDSUB, and QSUB.
@@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpIdx >= NumOps)
return false;
- // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
- if (SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rm) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
-
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opcode == ARM::SSATasr && ShAmt == 0)
- ShAmt = 32;
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
- MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
- : getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
+ ++OpIdx;
+ }
uint32_t mask = 0;
if (!getBFCInvMask(insn, mask))
return false;
@@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
++OpIdx;
}
return true;
}
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
// Extend instructions.
// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
@@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd for operand 0.
@@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// VFP Load/Store Multiple Instructions.
// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is
+// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
// followed by a reglist of either DPR(s) or SPR(s).
//
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
@@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
- // Next comes the AM5 Opcode.
+ // Next comes the AM4 Opcode.
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
// Must be either "ia" or "db" submode.
if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
- DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+ DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
return false;
}
-
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
// Handling the two predicate operands before the reglist.
int64_t CondVal = insn >> ARMII::CondShift;
@@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx += 4;
bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+ Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd.
unsigned RegD = decodeVFPRd(insn, isSPVFP);
// Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
for (unsigned i = 0; i < Regs; ++i) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
@@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have homogeneous NEON registers for Load/Store.
unsigned RegClass = 0;
+ bool DRegPair = UseDRegPair(Opcode);
// Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
+ unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
unsigned Rn = decodeRn(insn);
unsigned Rm = decodeRm(insn);
@@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
RegClass = OpInfo[OpIdx].RegClass;
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
return true;
}
@@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
break;
case ARM::VMOVv4i16:
case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
esize = ESize16;
break;
case ARM::VMOVv2i32:
case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
esize = ESize32;
break;
case ARM::VMOVv1i64:
@@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.49 ISB
static inline bool MemBarrierInstr(uint32_t insn) {
unsigned op7_4 = slice(insn, 7, 4);
- if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+ if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
return true;
return false;
@@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- if (MemBarrierInstr(insn))
+ if (MemBarrierInstr(insn)) {
+ // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+ // of within the generic ARMBasicMCBuilder::BuildIt() method.
+ //
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
return true;
+ }
switch (Opcode) {
case ARM::CLREX:
@@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleLdStMulFrm,
&DisassembleLdStExFrm,
&DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
&DisassembleExtFrm,
&DisassembleVFPUnaryFrm,
&DisassembleVFPBinaryFrm,
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 7d21256a14f9..9c30d332d1f2 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -23,7 +23,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
@@ -53,36 +54,35 @@ public:
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_EXTFRM, 13) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
- ENTRY(ARM_FORMAT_THUMBFRM, 24) \
- ENTRY(ARM_FORMAT_NEONFRM, 25) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
- ENTRY(ARM_FORMAT_MISCFRM, 29) \
- ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
- ENTRY(ARM_FORMAT_NLdSt, 31) \
- ENTRY(ARM_FORMAT_N1RegModImm, 32) \
- ENTRY(ARM_FORMAT_N2Reg, 33) \
- ENTRY(ARM_FORMAT_NVCVT, 34) \
- ENTRY(ARM_FORMAT_NVecDupLn, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
- ENTRY(ARM_FORMAT_N3Reg, 38) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
- ENTRY(ARM_FORMAT_NVecExtract, 40) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
- ENTRY(ARM_FORMAT_NVTBL, 42)
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
@@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
}
/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
- uint32_t Val) {
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
assert(From < 32 && To < 32 && From >= To);
uint32_t Mask = ((1 << (From - To + 1)) - 1);
Bits &= ~(Mask << To);
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b7a0bf6fdb9..112817b13cf9 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) {
}
static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID;
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
}
// Utilities for 32-bit Thumb instructions.
@@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
switch (bits2) {
default: assert(0 && "No such value");
case 0:
- ShOp = ARM_AM::lsl;
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
return imm5;
case 1:
ShOp = ARM_AM::lsr;
@@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass < 0
&& OpInfo[3].RegClass < 0
- && "Exactlt 4 operands expect and first two as reg operands");
+ && "Exactly 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
@@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first two as reg operands");
- bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the constant shift specifier.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, OpInfo[0].RegClass,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MI.getOperand(Idx));
++OpIdx;
} else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
decodeRn(insn))));
++OpIdx;
} else {
@@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRm(insn))));
++OpIdx;
@@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned imm5 = getShiftAmtBits(insn);
ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
-
- // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
- // decode the shift amount from raw imm5 and bits2, but we DO NOT need
- // to encode the ShOp, as it's in the asm string already.
- if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
- MI.addOperand(MCOperand::CreateImm(ShAmt));
- else
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
}
++OpIdx;
}
@@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the modified immediate.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, RdRegClassID,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
return false;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
++OpIdx;
}
@@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
switch (Opcode) {
- case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
- case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
return true;
default:
return false;
}
}
-static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::t2SSATlsl:
- case ARM::t2SSATasr:
- return slice(insn, 4, 0) + 1;
- case ARM::t2SSAT16:
- return slice(insn, 3, 0) + 1;
- case ARM::t2USATlsl:
- case ARM::t2USATasr:
- return slice(insn, 4, 0);
- case ARM::t2USAT16:
- return slice(insn, 3, 0);
- default:
- assert(0 && "Unexpected opcode");
- return 0;
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
}
+ return true;
}
// A6.3.3 Data-processing (plain binary immediate)
@@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
// o t2SBFX (SBFX): Rs Rn lsb width
// o t2UBFX (UBFX): Rs Rn lsb width
// o t2BFI (BFI): Rs Rn lsb width
-//
-// [Signed|Unsigned] Saturate [16]
-//
-// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
-// o t2SSAT16, t2USAT16: Rs sat_pos Rn
static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
// Build the register operand(s), followed by the immediate(s).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
decodeRs(insn))));
++OpIdx;
- // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
- if (Thumb2SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rn) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
if (TwoReg) {
assert(NumOps >= 3 && "Expect >= 3 operands");
int Idx;
@@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MI.getOperand(Idx));
} else {
// Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
}
++OpIdx;
}
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC) {
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
return false;
} else {
// Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
- Opcode == ARM::t2BFI) && "Unexpected opcode");
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- if (Opcode == ARM::t2BFI) {
- if (getMsb(insn) < getLsb(insn)) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
- } else
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
++OpIdx;
}
@@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
// A8.6.26
// t2BXJ -> Rn
//
-// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
-// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
//
// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
"Expect >= 2 operands and first two as reg operands");
// Build the register operands, followed by the optional rotation amount.
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
++OpIdx;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
++OpIdx;
@@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
// Build the register operands.
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
NumOpsAdded = FourReg ? 4 : 3;
@@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
// Build the register operands.
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
@@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
break;
case 2:
if (op == 0) {
- if (slice(op2, 5, 5) == 0) {
+ if (slice(op2, 5, 5) == 0)
// Data-processing (modified immediate)
return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
B);
- } else {
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- } else {
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
- break;
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
case 3:
switch (slice(op2, 6, 5)) {
case 0:
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 9e3ff29e07c4..b3fcfaf6bda7 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -14,10 +14,11 @@ TARGET = ARM
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
- ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtarget.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenDecoderTables.inc ARMGenEDInfo.inc
+ ARMGenDecoderTables.inc ARMGenEDInfo.inc \
+ ARMGenFastISel.inc
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index bbdd3c7f7c3e..97e54bfaed9e 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted");
namespace {
struct NEONMoveFixPass : public MachineFunctionPass {
static char ID;
- NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index f67717cdd56f..3407ac6fe08e 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -23,7 +23,7 @@ namespace {
public:
static char ID;
- NEONPreAllocPass() : MachineFunctionPass(&ID) {}
+ NEONPreAllocPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
FirstOpnd = 0;
@@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD3d8:
- case ARM::VLD3d16:
- case ARM::VLD3d32:
- case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VLD3q8_UPD:
- case ARM::VLD3q16_UPD:
- case ARM::VLD3q32_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3q8odd_UPD:
- case ARM::VLD3q16odd_UPD:
- case ARM::VLD3q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD3LNq16:
case ARM::VLD3LNq32:
FirstOpnd = 0;
@@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD4d8:
- case ARM::VLD4d16:
- case ARM::VLD4d32:
- case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD4q8_UPD:
- case ARM::VLD4q16_UPD:
- case ARM::VLD4q32_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4q8odd_UPD:
- case ARM::VLD4q16odd_UPD:
- case ARM::VLD4q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
FirstOpnd = 0;
@@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;
@@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST3d8:
- case ARM::VST3d16:
- case ARM::VST3d32:
- case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VST3q8_UPD:
- case ARM::VST3q16_UPD:
- case ARM::VST3q32_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3q8odd_UPD:
- case ARM::VST3q16odd_UPD:
- case ARM::VST3q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST3LNq16:
case ARM::VST3LNq32:
FirstOpnd = 2;
@@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST4d8:
- case ARM::VST4d16:
- case ARM::VST4d32:
- case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
@@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VST4q8_UPD:
- case ARM::VST4q16_UPD:
- case ARM::VST4q32_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4q8odd_UPD:
- case ARM::VST4q16odd_UPD:
- case ARM::VST4q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST4LNq16:
case ARM::VST4LNq32:
FirstOpnd = 2;
@@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
continue;
if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
- llvm_unreachable("expected a REG_SEQUENCE");
+
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ // For now, just assign a fixed set of adjacent registers.
+ // This leaves plenty of room for future improvements.
+ static const unsigned NEONDRegs[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7
+ };
+ MO.setReg(NEONDRegs[Offset + R * Stride]);
+
+ if (MO.isUse()) {
+ // Insert a copy from VirtReg.
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
+ .addReg(VirtReg, getKillRegState(MO.isKill()));
+ MO.setIsKill();
+ } else if (MO.isDef() && !MO.isDead()) {
+ // Add a copy to VirtReg.
+ BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
+ .addReg(MO.getReg());
+ }
+ }
}
return Modified;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0cb8ff01181d..9fc3fb92cb2c 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that.
//===---------------------------------------------------------------------===//
-Given the following on ARMv7:
-int test1(int A, int B) {
- return (A&-8388481)|(B&8388480);
-}
-
-We currently generate:
- bfc r0, #7, #16
- movw r2, #:lower16:8388480
- movt r2, #:upper16:8388480
- and r1, r1, r2
- orr r0, r1, r0
- bx lr
-
-The following is much shorter:
- lsr r1, r1, #7
- bfi r0, r1, #7, #16
- bx lr
-
-
-//===---------------------------------------------------------------------===//
-
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
int a(int x) { return __builtin_bswap32(x); }
@@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic
version.
//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+ ldrb r0, [r0]
+ ldrb r1, [r1]
+ orr r0, r1, r0
+ tst r0, #255
+ moveq r0, #0
+ streq r0, [r2]
+ bx lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 39b70b43b23f..a21a3da10bda 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
MI.RemoveOperand(Op);
}
-int Thumb1RegisterInfo::
-rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
-{
- // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
- // version then can pull out Thumb1 specific parts here
- return 0;
-}
-
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
- // Thumb1 can't use the emergency spill slot on the stack because
- // ldr/str immediate offsets must be positive, and if we're referencing
- // off the frame pointer (if, for example, there are alloca() calls in
- // the function, the offset will be negative. Use R12 instead since that's
- // a call clobbered register that we know won't be used in Thumb1 mode.
- DebugLoc DL;
- BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
- addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
-
- // The UseMI is where we would like to restore the register. If there's
- // interference with R12 before then, however, we'll need to restore it
- // before that instead and adjust the UseMI.
- bool done = false;
- for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
- if (II->isDebugValue())
- continue;
- // If this instruction affects R12, adjust our restore point.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = II->getOperand(i);
- if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- if (MO.getReg() == ARM::R12) {
- UseMI = II;
- done = true;
- break;
- }
- }
- }
- // Restore the register from R12
- BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
-
- return true;
-}
-
-unsigned
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
- unsigned VReg = 0;
- unsigned i = 0;
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MF.getFrameInfo()->getStackSize() + SPAdj;
-
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
- // There are alloca()'s in this function, must reference off the frame
- // pointer instead.
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- }
-
- // Special handling of dbg_value instructions.
- if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
- }
-
unsigned Opcode = MI.getOpcode();
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
if (Opcode == ARM::tADDrSPi) {
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
// Can't use tADDrSPi if it's based off the frame pointer.
unsigned NumBits = 0;
@@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(i+1);
- while (MI.getNumOperands() > i+1 &&
- (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm()));
- return 0;
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
}
// Common case: small offset, fits into instruction.
@@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
}
- return 0;
+ return true;
}
unsigned DestReg = MI.getOperand(0).getReg();
@@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
*this, dl);
MBB.erase(II);
- return 0;
+ return true;
}
if (Offset > 0) {
@@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Mask);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = llvm::next(II);
@@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
if (Opcode == ARM::tADDi3) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
}
- return 0;
+ return true;
} else {
unsigned ImmIdx = 0;
int InstrOffs = 0;
@@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Scale = 1;
switch (AddrMode) {
case ARMII::AddrModeT1_s: {
- ImmIdx = i+1;
+ ImmIdx = FrameRegIdx+1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = (FrameReg == ARM::SP) ? 8 : 5;
Scale = 4;
@@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Mask = (1 << NumBits) - 1;
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
- return 0;
+ return true;
}
bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset &= ~(Mask*Scale);
}
}
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+ addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert(Offset && "This code isn't needed if offset already handled!");
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
@@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(0, false));
} else if (Desc.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
- assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
bool UseRR = false;
- bool TrackVReg = true;
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
if (Opcode == ARM::tSpill) {
if (FrameReg == ARM::SP)
@@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
UseRR = true;
- TrackVReg = false;
}
} else
emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
@@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
else // tSTR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false));
- if (!ReuseFrameIndexVals || !TrackVReg)
- VReg = 0;
} else
assert(false && "Unexpected opcode!");
@@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
- return VReg;
}
void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9a0308afa20c..c578054a5d71 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,27 +38,27 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
- // could not be handled directly in MI.
- int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc,
- unsigned SUBriOpc) const;
-
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
unsigned Reg) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index cd15bbed9f23..45e693744b80 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -27,7 +27,7 @@ namespace {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI,
}
}
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::tMOVgpr2tgpr:
+ case ARM::tMOVtgpr2gpr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
bool
Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
SmallSet<unsigned, 4> &Defs,
SmallSet<unsigned, 4> &Uses) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
- "Sub-register indices still around?");
- // llvm models select's as two-address instructions. That means a copy
- // is inserted before a t2MOVccr, etc. If the copy is scheduled in
- // between selects we would end up creating multiple IT blocks.
-
- // First check if it's safe to move it.
- if (Uses.count(DstReg) || Defs.count(SrcReg))
- return false;
-
- // Then peek at the next instruction to see if it's predicated on CC or OCC.
- // If not, then there is nothing to be gained by moving the copy.
- MachineBasicBlock::iterator I = MI; ++I;
- MachineBasicBlock::iterator E = MI->getParent()->end();
- while (I != E && I->isDebugValue())
- ++I;
- if (I != E) {
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
- if (NCC == CC || NCC == OCC)
- return true;
- }
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
}
return false;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index ee517279c9d7..442f41da8a2d 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ba392f36d946..0c3962dd123d 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -173,7 +173,7 @@ namespace {
char Thumb2SizeReduce::ID = 0;
}
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
return false;
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+ if (!isOK)
+ return false;
+
OpNum = 0;
isLdStMul = true;
break;
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
index 001656e0121a..376811709536 100644
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
namespace {
struct AlphaBSel : public MachineFunctionPass {
static char ID;
- AlphaBSel() : MachineFunctionPass(&ID) {}
+ AlphaBSel() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index a6c6f52704f6..3aec07035d74 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -34,7 +34,7 @@ namespace {
public:
static char ID;
- AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID),
+ AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d526dc0827b2..d197bd15ef9c 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -113,8 +113,8 @@ namespace {
static uint64_t getNearPower2(uint64_t x) {
if (!x) return 0;
unsigned at = CountLeadingZeros_64(x);
- uint64_t complow = 1 << (63 - at);
- uint64_t comphigh = 1 << (64 - at);
+ uint64_t complow = 1ULL << (63 - at);
+ uint64_t comphigh = 1ULL << (64 - at);
//cerr << x << ":" << complow << ":" << comphigh << "\n";
if (abs64(complow - x) <= abs64(comphigh - x))
return complow;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ad625a269417..5a2f5610fdb4 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -27,32 +27,6 @@ AlphaInstrInfo::AlphaInstrInfo()
RI(*this) { }
-bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg, unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- unsigned oc = MI.getOpcode();
- if (oc == Alpha::BISr ||
- oc == Alpha::CPYSS ||
- oc == Alpha::CPYST ||
- oc == Alpha::CPYSSt ||
- oc == Alpha::CPYSTs) {
- // or r1, r2, r2
- // cpys(s|t) r1 r2 r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid Alpha BIS instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- SrcSR = DstSR = 0;
- return true;
- }
- }
- return false;
-}
-
unsigned
AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index e20e8323b64e..ee6077a4a01a 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
index 34be470f03e3..85fbfd1affe2 100644
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -39,7 +39,7 @@ namespace {
static char ID;
AlphaLLRPPass(AlphaTargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Alpha NOP inserter";
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index dc9d935ec047..327ddb4d9a72 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -137,10 +137,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
//variable locals
//<- SP
-unsigned
+void
AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -185,7 +184,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
MI.getOperand(i).ChangeToImmediate(Offset);
}
- return 0;
}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index f9fd87a63737..b164979a6311 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -38,9 +38,8 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
//void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 9f4aff6fd5f5..5428cb96173b 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -53,8 +53,6 @@ namespace {
void printOp(const MachineOperand &MO, raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O,
- bool brackets=true);
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
void EmitStartOfAsmFile(Module &M);
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index a74d42d59549..e50d57a31b6e 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -28,34 +28,6 @@ BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
RI(ST, *this),
Subtarget(ST) {}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
- unsigned &DstReg,
- unsigned &SrcSR,
- unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
- switch (MI.getOpcode()) {
- case BF::MOVE:
- case BF::MOVE_ncccc:
- case BF::MOVE_ccncc:
- case BF::MOVECC_zext:
- case BF::MOVECC_nz:
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- case BF::SLL16i:
- if (MI.getOperand(2).getImm()!=0)
- return false;
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- default:
- return false;
- }
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index 6c3591707269..fdc1029da588 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -30,10 +30,6 @@ namespace llvm {
/// always be able to get register info as well (through this method).
virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 06e95de1587f..a51831263e90 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -190,10 +190,9 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II,
return Reg;
}
-unsigned
+void
BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -230,20 +229,20 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(isStore
? BF::STORE32p_uimm6m4
: BF::LOAD32p_uimm6m4));
- return 0;
+ return;
}
if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32fp_nimm7m4
: BF::LOAD32fp_nimm7m4));
MI.getOperand(FIPos+1).setImm(-Offset);
- return 0;
+ return;
}
if (isInt<18>(Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32p_imm18m4
: BF::LOAD32p_imm18m4));
- return 0;
+ return;
}
// Use RegScavenger to calculate proper offset...
MI.dump();
@@ -328,7 +327,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
llvm_unreachable("Cannot eliminate frame index");
break;
}
- return 0;
}
void BlackfinRegisterInfo::
@@ -344,10 +342,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
-void BlackfinRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-}
-
// Emit a prologue that sets up a stack frame.
// On function entry, R0-R2 and P0 may hold arguments.
// R3, P1, and P2 may be used as scratch registers
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index ead0b4a73c83..bb83c34f8003 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -51,15 +51,12 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index e8d8474b5be8..270fff6064ad 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -73,7 +73,7 @@ namespace {
public:
static char ID;
CBackendNameAllUsedStructsAndMergeFunctions()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<FindUsedTypes>();
}
@@ -110,7 +110,7 @@ namespace {
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
- : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0),
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
NextAnonValueNumber(0) {
FPCounter = 0;
@@ -199,7 +199,6 @@ namespace {
void lowerIntrinsics(Function &F);
- void printModule(Module *M);
void printModuleTypes(const TypeSymbolTable &ST);
void printContainedStructs(const Type *Ty, std::set<const Type *> &);
void printFloatingPointConstants(Function &F);
@@ -1300,6 +1299,13 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
}
std::string CWriter::GetValueName(const Value *Operand) {
+
+ // Resolve potential alias.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+ if (const Value *V = GA->resolveAliasedGlobal(false))
+ Operand = V;
+ }
+
// Mangle globals with the standard mangler interface for LLC compatibility.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
SmallString<128> Str;
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index ec2f663908f6..04fa2ae866d6 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,4 +1,4 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A>
// Return Value Calling Convention
//===----------------------------------------------------------------------===//
-// Return-value convention for Cell SPU: Everything can be passed back via $3:
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
def RetCC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3]>>,
- CCIfType<[i16], CCAssignToReg<[R3]>>,
- CCIfType<[i32], CCAssignToReg<[R3]>>,
- CCIfType<[i64], CCAssignToReg<[R3]>>,
- CCIfType<[i128], CCAssignToReg<[R3]>>,
- CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
- CCIfType<[v2i32], CCAssignToReg<[R3]>>
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
]>;
@@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[
R39, R40, R41, R42, R43, R44, R45, R46, R47,
R48, R49, R50, R51, R52, R53, R54, R55, R56,
R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9b8c2ddd0635..2f1598441f5a 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -41,13 +41,6 @@ using namespace llvm;
namespace {
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
- isI64IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
return isInt<10>(CN->getSExtValue());
@@ -67,14 +60,6 @@ namespace {
return isInt<10>(CN->getSExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(SDNode *N)
- {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- return (CN != 0 && isI16IntS10Immediate(CN));
- }
-
//! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
bool
isI16IntU10Immediate(ConstantSDNode *CN)
@@ -82,14 +67,6 @@ namespace {
return isUInt<10>((short) CN->getZExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntU10Immediate(SDNode *N)
- {
- return (N->getOpcode() == ISD::Constant
- && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
- }
-
//! ConstantSDNode predicate for signed 16-bit values
/*!
\arg CN The constant SelectionDAG node holding the value
@@ -119,14 +96,6 @@ namespace {
return false;
}
- //! SDNode predicate for signed 16-bit values.
- bool
- isIntS16Immediate(SDNode *N, short &Imm)
- {
- return (N->getOpcode() == ISD::Constant
- && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
- }
-
//! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
static bool
isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
@@ -142,16 +111,6 @@ namespace {
return false;
}
- bool
- isHighLow(const SDValue &Op)
- {
- return (Op.getOpcode() == SPUISD::IndirectAddr
- && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
- && Op.getOperand(1).getOpcode() == SPUISD::Lo)
- || (Op.getOperand(0).getOpcode() == SPUISD::Lo
- && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
- }
-
//===------------------------------------------------------------------===//
//! EVT to "useful stuff" mapping structure:
@@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
return true;
} else if (Opc == ISD::Register
||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF) {
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ece19b9b89f6..46f31899be0c 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
@@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
if (alignment == 16) {
ConstantSDNode *CN;
-
// Special cases for a known aligned load to simplify the base pointer
// and insertion byte:
if (basePtr.getOpcode() == ISD::ADD
@@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
}
} else {
// Unaligned load: must be more pessimistic about addressing modes:
@@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
DAG.getConstant(0, PtrVT));
}
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ // Load the memory to which to store.
+ alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
SN->getSrcValue(), SN->getSrcValueOffset(),
SN->isVolatile(), SN->isNonTemporal(), 16);
@@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
vectorizeOp, alignLoadVec,
@@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- MVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case MVT::i8:
- case MVT::i16:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
@@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
@@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
@@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
@@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// R1 ($sp) is used here only as it is guaranteed to have last bits zero
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2Elt, MVT::i32));
+ DAG.getConstant(V2EltOffset, MVT::i32));
SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
@@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
@@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
// use 0 when the lane to insert to is 'undef'
- int64_t Idx=0;
+ int64_t Offset=0;
if (IdxOp.getOpcode() != ISD::UNDEF) {
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Idx = (CN->getSExtValue());
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
}
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Idx, PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 69aa0887bd77..26d6b4f25ef1 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
-bool
-SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- switch (MI.getOpcode()) {
- default:
- break;
- case SPU::ORIv4i32:
- case SPU::ORIr32:
- case SPU::ORHIv8i16:
- case SPU::ORHIr16:
- case SPU::ORHIi8i16:
- case SPU::ORBIv16i8:
- case SPU::ORBIr8:
- case SPU::ORIi16i32:
- case SPU::ORIi8i32:
- case SPU::AHIvec:
- case SPU::AHIr16:
- case SPU::AIv4i32:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::AIr32:
- assert(MI.getNumOperands() == 3 &&
- "wrong number of operands to AIr32");
- if (MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- (MI.getOperand(2).isImm() &&
- MI.getOperand(2).getImm() == 0)) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::LRr8:
- case SPU::LRr16:
- case SPU::LRr32:
- case SPU::LRf32:
- case SPU::LRr64:
- case SPU::LRf64:
- case SPU::LRr128:
- case SPU::LRv16i8:
- case SPU::LRv8i16:
- case SPU::LRv4i32:
- case SPU::LRv4f32:
- case SPU::LRv2i64:
- case SPU::LRv2f64:
- case SPU::ORv16i8_i8:
- case SPU::ORv8i16_i16:
- case SPU::ORv4i32_i32:
- case SPU::ORv2i64_i64:
- case SPU::ORv4f32_f32:
- case SPU::ORv2f64_f64:
- case SPU::ORi8_v16i8:
- case SPU::ORi16_v8i16:
- case SPU::ORi32_v4i32:
- case SPU::ORi64_v2i64:
- case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
-/*
- case SPU::ORi128_r64:
- case SPU::ORi128_f64:
- case SPU::ORi128_r32:
- case SPU::ORi128_f32:
- case SPU::ORi128_r16:
- case SPU::ORi128_r8:
-*/
- case SPU::ORi128_vec:
-/*
- case SPU::ORr64_i128:
- case SPU::ORf64_i128:
- case SPU::ORr32_i128:
- case SPU::ORf32_i128:
- case SPU::ORr16_i128:
- case SPU::ORr8_i128:
-*/
- case SPU::ORvec_i128:
-/*
- case SPU::ORr16_r32:
- case SPU::ORr8_r32:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORr32_r16:
- case SPU::ORr32_r8:
- case SPU::ORr16_r64:
- case SPU::ORr8_r64:
- case SPU::ORr64_r16:
- case SPU::ORr64_r8:
-*/
- case SPU::ORr64_r32:
- case SPU::ORr32_r64:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORf64_r64:
- case SPU::ORr64_f64: {
- assert(MI.getNumOperands() == 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid SPU OR<type>_<vec> or LR instruction!");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- break;
- }
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORr128:
- case SPU::ORf32:
- case SPU::ORf64:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid SPU OR(vec|r32|r64|gprc) instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- }
-
- return false;
-}
-
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index fbb173318148..191e55d0ca61 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,12 +32,6 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index a7fb14c26a76..ca0fe00e37f8 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadDFormVec<v4f32>;
def v2f64: LoadDFormVec<v2f64>;
- def v2i32: LoadDFormVec<v2i32>;
-
def r128: LoadDForm<GPRC>;
def r64: LoadDForm<R64C>;
def r32: LoadDForm<R32C>;
@@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadAFormVec<v4f32>;
def v2f64: LoadAFormVec<v2f64>;
- def v2i32: LoadAFormVec<v2i32>;
-
def r128: LoadAForm<GPRC>;
def r64: LoadAForm<R64C>;
def r32: LoadAForm<R32C>;
@@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadXFormVec<v4f32>;
def v2f64: LoadXFormVec<v2f64>;
- def v2i32: LoadXFormVec<v2i32>;
-
def r128: LoadXForm<GPRC>;
def r64: LoadXForm<R64C>;
def r32: LoadXForm<R32C>;
@@ -180,8 +174,6 @@ multiclass StoreDForms
def v4f32: StoreDFormVec<v4f32>;
def v2f64: StoreDFormVec<v2f64>;
- def v2i32: StoreDFormVec<v2i32>;
-
def r128: StoreDForm<GPRC>;
def r64: StoreDForm<R64C>;
def r32: StoreDForm<R32C>;
@@ -212,8 +204,6 @@ multiclass StoreAForms
def v4f32: StoreAFormVec<v4f32>;
def v2f64: StoreAFormVec<v2f64>;
- def v2i32: StoreAFormVec<v2i32>;
-
def r128: StoreAForm<GPRC>;
def r64: StoreAForm<R64C>;
def r32: StoreAForm<R32C>;
@@ -246,8 +236,6 @@ multiclass StoreXForms
def v4f32: StoreXFormVec<v4f32>;
def v2f64: StoreXFormVec<v2f64>;
- def v2i32: StoreXFormVec<v2i32>;
-
def r128: StoreXForm<GPRC>;
def r64: StoreXForm<R64C>;
def r32: StoreXForm<R32C>;
@@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>:
multiclass AddInstruction {
def v4i32: AVecInst<v4i32>;
def v16i8: AVecInst<v16i8>;
-
def r32: ARegInst<R32C>;
}
@@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -1448,6 +1436,9 @@ class ORCvtGPRCVec:
class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+class ORCvtVecVec:
+ ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
multiclass BitwiseOr
{
def v16i8: ORVecInst<v16i8>;
@@ -3894,6 +3885,79 @@ multiclass SFPSub
defm FS : SFPSub;
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
// Floating point reciprocal estimate
class FRESTInst<dag OOL, dag IOL>:
@@ -4019,72 +4083,6 @@ def FSCRRf32 :
// status and control register read
//--------------------------------------
-// Floating point multiply instructions
-//--------------------------------------
-
-def FMv4f32:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def FMf32 :
- RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-//--------------------------------------
// Floating Point Conversions
// Signed conversions:
def CSiFv4f32:
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 6216651e48a4..e1a0358abc46 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{
return (N->getZExtValue() <= 0xff);
}]>;
-// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i64ImmSExt10 : PatLeaf<(imm), [{
- return isI64IntS10Immediate(N);
-}]>;
-
// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
// extended field. Used by RI10Form instructions like 'ldq'.
def i32ImmSExt10 : PatLeaf<(imm), [{
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index f7cfa42f2a95..cf718917a561 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
MBB.erase(I);
}
-unsigned
+void
SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value,
RegScavenger *RS) const
{
unsigned i = 0;
@@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
} else {
MO.ChangeToImmediate(Offset);
}
- return 0;
}
/// determineFrameLayout - Determine the size of the frame and maximum call
@@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
@@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// Mark effective beginning of when frame pointer is ready.
MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
@@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
dl = MBBI->getDebugLoc();
// Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL))
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
.addSym(MMI.getContext().CreateTempSymbol());
}
}
@@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
case SPU::LQDr32: return SPU::LQXr32;
case SPU::LQDr128: return SPU::LQXr128;
case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
case SPU::LQDv4f32: return SPU::LQXv4f32;
case SPU::STQDr32: return SPU::STQXr32;
case SPU::STQDr128: return SPU::STQXr128;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 7a6ae6d43c7e..aedb769cb4fc 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -63,9 +63,8 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
//! Convert frame indicies into machine operands
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
//! Determine the frame's layour
void determineFrameLayout(MachineFunction &MF) const;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index bb88f2bf9a29..3e8f0979256a 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128,
// The SPU's registers as vector registers:
def VECREG : RegisterClass<"SPU",
- [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
128,
[
/* volatile register */
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 145568adcd4a..f08559f6e9f2 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -104,7 +104,7 @@ namespace {
public:
static char ID;
explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
virtual const char *getPassName() const { return "C++ backend"; }
@@ -288,6 +288,8 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkerPrivateLinkage"; break;
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
case GlobalValue::AvailableExternallyLinkage:
Out << "GlobalValue::AvailableExternallyLinkage "; break;
case GlobalValue::LinkOnceAnyLinkage:
@@ -471,14 +473,22 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(Nest);
HANDLE_ATTR(ReadNone);
HANDLE_ATTR(ReadOnly);
- HANDLE_ATTR(InlineHint);
HANDLE_ATTR(NoInline);
HANDLE_ATTR(AlwaysInline);
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
#undef HANDLE_ATTR
+ if (attrs & Attribute::StackAlignment)
+ Out << " | Attribute::constructStackAlignmentFromInt("
+ << Attribute::getStackAlignmentFromAttrs(attrs)
+ << ")";
+ attrs &= ~Attribute::StackAlignment;
assert(attrs == 0 && "Unhandled attribute!");
Out << ";";
nl(Out);
@@ -1404,7 +1414,8 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
}
Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), "
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params.begin(), "
<< iName << "_params.end(), \"";
} else if (call->getNumArgOperands() == 1) {
Out << "CallInst* " << iName << " = CallInst::Create("
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index b6e4d654f4aa..f4b30ad271f1 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -65,11 +65,8 @@ namespace {
void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
- void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
void printSavedRegsBitmask(raw_ostream &OS);
- const char *emitCurrentABIString();
void emitFrameDirective();
void printInstruction(const MachineInstr *MI, raw_ostream &O);
@@ -292,13 +289,6 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
printOperand(MI, opNum, O);
}
-void MBlazeAsmPrinter::
-printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier) {
- const MachineOperand& MO = MI->getOperand(opNum);
- O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 482ddd3963fb..3815b6d0a398 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
index ddd49980e0a2..8622e0d74bcd 100644
--- a/lib/Target/MBlaze/MBlazeCallingConv.td
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -1,4 +1,4 @@
-//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 42fea2507332..b551b79b291e 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "MBlaze Delay Slot Filler";
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index c7cd5f4e44a9..e64dd0e3e2c3 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -219,7 +219,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isUInt<16>(CN->getZExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index a48a8c972353..657b1d4940a7 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index b59999e76ae5..51584111e666 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 7d655433d4e4..28e8e4402225 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 6ff5825a26b6..b590c090e095 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -30,41 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MBlazeInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // add $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
- if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).isReg() &&
- MI.getOperand(2).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // addi $dst, $src, 0
- // ori $dst, $src, 0
- if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index f0743705f010..b3dba0ec768c 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -173,12 +173,6 @@ public:
///
virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 3c406dda0591..e5d153474a7e 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 82552fa4b343..a27cb5ba0dc4 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -17,17 +17,11 @@
// MBlaze intrinsic classes.
let TargetPrefix = "mblaze", isTarget = 1 in {
- class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
- class MBFSL_Put_Intrinsic : Intrinsic<[],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
- class MBFSL_PutT_Intrinsic : Intrinsic<[],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 8cafa8c519c6..22b6a30470d1 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -242,9 +242,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MBlazeRegisterInfo::
+void MBlazeRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const {
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -277,7 +277,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(oi).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MBlazeRegisterInfo::
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index af97b0e2d79e..1e1fde14ab7b 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -63,9 +63,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index d0a1e7556c43..5e935103389e 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 1fec9e694005..4a65542a447c 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt
deleted file mode 100644
index b1d47ef05ec5..000000000000
--- a/lib/Target/MSIL/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_target(MSIL
- MSILWriter.cpp
- )
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
deleted file mode 100644
index cc350e8a4f89..000000000000
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ /dev/null
@@ -1,1706 +0,0 @@
-//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library converts LLVM code to MSIL code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/Passes.h"
-using namespace llvm;
-
-namespace llvm {
- // TargetMachine for the MSIL
- struct MSILTarget : public TargetMachine {
- MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
- : TargetMachine(T) {}
-
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
-
- virtual const TargetData *getTargetData() const { return 0; }
- };
-}
-
-extern "C" void LLVMInitializeMSILTarget() {
- // Register the target.
- RegisterTargetMachine<MSILTarget> X(TheMSILTarget);
-}
-
-bool MSILModule::runOnModule(Module &M) {
- ModulePtr = &M;
- TD = &getAnalysis<TargetData>();
- bool Changed = false;
- // Find named types.
- TypeSymbolTable& Table = M.getTypeSymbolTable();
- std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
- for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
- if (!I->second->isStructTy() && !I->second->isOpaqueTy())
- Table.remove(I++);
- else {
- std::set<const Type *>::iterator T = Types.find(I->second);
- if (T==Types.end())
- Table.remove(I++);
- else {
- Types.erase(T);
- ++I;
- }
- }
- }
- // Find unnamed types.
- unsigned RenameCounter = 0;
- for (std::set<const Type *>::const_iterator I = Types.begin(),
- E = Types.end(); I!=E; ++I)
- if (const StructType *STy = dyn_cast<StructType>(*I)) {
- while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
- ++RenameCounter;
- Changed = true;
- }
- // Pointer for FunctionPass.
- UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
- return Changed;
-}
-
-char MSILModule::ID = 0;
-char MSILWriter::ID = 0;
-
-bool MSILWriter::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
- return false;
-
- LInfo = &getAnalysis<LoopInfo>();
- printFunction(F);
- return false;
-}
-
-
-bool MSILWriter::doInitialization(Module &M) {
- ModulePtr = &M;
- Out << ".assembly extern mscorlib {}\n";
- Out << ".assembly MSIL {}\n\n";
- Out << "// External\n";
- printExternals();
- Out << "// Declarations\n";
- printDeclarations(M.getTypeSymbolTable());
- Out << "// Definitions\n";
- printGlobalVariables();
- Out << "// Startup code\n";
- printModuleStartup();
- return false;
-}
-
-
-bool MSILWriter::doFinalization(Module &M) {
- return false;
-}
-
-
-void MSILWriter::printModuleStartup() {
- Out <<
- ".method static public int32 $MSIL_Startup() {\n"
- "\t.entrypoint\n"
- "\t.locals (native int i)\n"
- "\t.locals (native int argc)\n"
- "\t.locals (native int ptr)\n"
- "\t.locals (void* argv)\n"
- "\t.locals (string[] args)\n"
- "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
- "\tdup\n"
- "\tstloc\targs\n"
- "\tldlen\n"
- "\tconv.i4\n"
- "\tdup\n"
- "\tstloc\targc\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tlocalloc\n"
- "\tstloc\targv\n"
- "\tldc.i4.0\n"
- "\tstloc\ti\n"
- "L_01:\n"
- "\tldloc\ti\n"
- "\tldloc\targc\n"
- "\tceq\n"
- "\tbrtrue\tL_02\n"
- "\tldloc\targs\n"
- "\tldloc\ti\n"
- "\tldelem.ref\n"
- "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
- "StringToHGlobalAnsi(string)\n"
- "\tstloc\tptr\n"
- "\tldloc\targv\n"
- "\tldloc\ti\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tadd\n"
- "\tldloc\tptr\n"
- "\tstind.i\n"
- "\tldloc\ti\n"
- "\tldc.i4.1\n"
- "\tadd\n"
- "\tstloc\ti\n"
- "\tbr\tL_01\n"
- "L_02:\n"
- "\tcall void $MSIL_Init()\n";
-
- // Call user 'main' function.
- const Function* F = ModulePtr->getFunction("main");
- if (!F || F->isDeclaration()) {
- Out << "\tldc.i4.0\n\tret\n}\n";
- return;
- }
- bool BadSig = true;
- std::string Args("");
- Function::const_arg_iterator Arg1,Arg2;
-
- switch (F->arg_size()) {
- case 0:
- BadSig = false;
- break;
- case 1:
- Arg1 = F->arg_begin();
- if (Arg1->getType()->isIntegerTy()) {
- Out << "\tldloc\targc\n";
- Args = getTypeName(Arg1->getType());
- BadSig = false;
- }
- break;
- case 2:
- Arg1 = Arg2 = F->arg_begin(); ++Arg2;
- if (Arg1->getType()->isIntegerTy() &&
- Arg2->getType()->getTypeID() == Type::PointerTyID) {
- Out << "\tldloc\targc\n\tldloc\targv\n";
- Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
- BadSig = false;
- }
- break;
- default:
- BadSig = true;
- }
-
- bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
- if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
- Out << "\tldc.i4.0\n";
- } else {
- Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
- getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
- if (RetVoid)
- Out << "\tldc.i4.0\n";
- else
- Out << "\tconv.i4\n";
- }
- Out << "\tret\n}\n";
-}
-
-bool MSILWriter::isZeroValue(const Value* V) {
- if (const Constant *C = dyn_cast<Constant>(V))
- return C->isNullValue();
- return false;
-}
-
-
-std::string MSILWriter::getValueName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- // Name into the quotes allow control and space characters.
- return "'"+Name+"'";
-}
-
-
-std::string MSILWriter::getLabelName(const std::string& Name) {
- if (Name.find('.')!=std::string::npos) {
- std::string Tmp(Name);
- // Replace unaccepable characters in the label name.
- for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
- if (*I=='.') *I = '@';
- return Tmp;
- }
- return Name;
-}
-
-
-std::string MSILWriter::getLabelName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- return getLabelName(Name);
-}
-
-
-std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
- switch (CallingConvID) {
- case CallingConv::C:
- case CallingConv::Cold:
- case CallingConv::Fast:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
- case CallingConv::X86_FastCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
- case CallingConv::X86_StdCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
- case CallingConv::X86_ThisCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
- default:
- errs() << "CallingConvID = " << CallingConvID << '\n';
- llvm_unreachable("Unsupported calling convention");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
- std::string Tmp = "";
- const Type* ElemTy = Ty;
- assert(Ty->getTypeID()==TyID && "Invalid type passed");
- // Walk trought array element types.
- for (;;) {
- // Multidimensional array.
- if (ElemTy->getTypeID()==TyID) {
- if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
- Tmp += utostr(ATy->getNumElements());
- else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
- Tmp += utostr(VTy->getNumElements());
- ElemTy = cast<SequentialType>(ElemTy)->getElementType();
- }
- // Base element type found.
- if (ElemTy->getTypeID()!=TyID) break;
- Tmp += ",";
- }
- return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
-}
-
-
-std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- case Type::VoidTyID:
- return "void ";
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- if(NumBits==1)
- return "bool ";
- if (!isSigned)
- return "unsigned int"+utostr(NumBits)+" ";
- return "int"+utostr(NumBits)+" ";
- case Type::FloatTyID:
- return "float32 ";
- case Type::DoubleTyID:
- return "float64 ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid primitive type");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
- bool isNested) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy())
- return getPrimitiveTypeName(Ty,isSigned);
- // FIXME: "OpaqueType" support
- switch (Ty->getTypeID()) {
- case Type::PointerTyID:
- return "void* ";
- case Type::StructTyID:
- if (isNested)
- return ModulePtr->getTypeName(Ty);
- return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
- case Type::ArrayTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- case Type::VectorTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid type in getTypeName()");
- }
- return ""; // Not reached
-}
-
-
-MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
- // Function argument
- if (isa<Argument>(V))
- return ArgumentVT;
- // Function
- else if (const Function* F = dyn_cast<Function>(V))
- return F->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Variable
- else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
- return G->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Constant
- else if (isa<Constant>(V))
- return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
- // Local variable
- return LocalVT;
-}
-
-
-std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- // Integer constant, expanding for stack operations.
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- // Expand integer value to "int32" or "int64".
- if (Expand) return (NumBits<=32 ? "i4" : "i8");
- if (NumBits==1) return "i1";
- return (isSigned ? "i" : "u")+utostr(NumBits/8);
- // Float constant.
- case Type::FloatTyID:
- return "r4";
- case Type::DoubleTyID:
- return "r8";
- case Type::PointerTyID:
- return "i"+utostr(TD->getTypeAllocSize(Ty));
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in TypeToPostfix()");
- }
- return ""; // Not reached
-}
-
-
-void MSILWriter::printConvToPtr() {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("conv.u4");
- break;
- case Module::Pointer64:
- printSimpleInstruction("conv.u8");
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printPtrLoad(uint64_t N) {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("ldc.i4",utostr(N).c_str());
- // FIXME: Need overflow test?
- if (!isUInt<32>(N)) {
- errs() << "Value = " << utostr(N) << '\n';
- llvm_unreachable("32-bit pointer overflowed");
- }
- break;
- case Module::Pointer64:
- printSimpleInstruction("ldc.i8",utostr(N).c_str());
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printValuePtrLoad(const Value* V) {
- printValueLoad(V);
- printConvToPtr();
-}
-
-
-void MSILWriter::printConstLoad(const Constant* C) {
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
- // Integer constant
- Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
- if (CInt->isMinValue(true))
- Out << CInt->getSExtValue();
- else
- Out << CInt->getZExtValue();
- } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
- // Float constant
- uint64_t X;
- unsigned Size;
- if (FP->getType()->getTypeID()==Type::FloatTyID) {
- X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 4;
- } else {
- X = FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 8;
- }
- Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
- } else if (isa<UndefValue>(C)) {
- // Undefined constant value = NULL.
- printPtrLoad(0);
- } else {
- errs() << "Constant = " << *C << '\n';
- llvm_unreachable("Invalid constant value");
- }
- Out << '\n';
-}
-
-
-void MSILWriter::printValueLoad(const Value* V) {
- MSILWriter::ValueType Location = getValueLocation(V);
- switch (Location) {
- // Global variable or function address.
- case GlobalVT:
- case InternalVT:
- if (const Function* F = dyn_cast<Function>(V)) {
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- printSimpleInstruction("ldftn",
- getCallSignature(F->getFunctionType(),NULL,Name).c_str());
- } else {
- std::string Tmp;
- const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
- if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
- Tmp = "void* "+getValueName(V);
- printSimpleInstruction("ldsfld",Tmp.c_str());
- } else {
- Tmp = getTypeName(ElemTy)+getValueName(V);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- }
- }
- break;
- // Function argument.
- case ArgumentVT:
- printSimpleInstruction("ldarg",getValueName(V).c_str());
- break;
- // Local function variable.
- case LocalVT:
- printSimpleInstruction("ldloc",getValueName(V).c_str());
- break;
- // Constant value.
- case ConstVT:
- if (isa<ConstantPointerNull>(V))
- printPtrLoad(0);
- else
- printConstLoad(cast<Constant>(V));
- break;
- // Constant expression.
- case ConstExprVT:
- printConstantExpr(cast<ConstantExpr>(V));
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printValueSave(const Value* V) {
- switch (getValueLocation(V)) {
- case ArgumentVT:
- printSimpleInstruction("starg",getValueName(V).c_str());
- break;
- case LocalVT:
- printSimpleInstruction("stloc",getValueName(V).c_str());
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right) {
- printValueLoad(Left);
- printValueLoad(Right);
- Out << '\t' << Name << '\n';
-}
-
-
-void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
- if(Operand)
- Out << '\t' << Inst << '\t' << Operand << '\n';
- else
- Out << '\t' << Inst << '\n';
-}
-
-
-void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
- for (BasicBlock::const_iterator I = Dst->begin(); isa<PHINode>(I); ++I) {
- const PHINode* Phi = cast<PHINode>(I);
- const Value* Val = Phi->getIncomingValueForBlock(Src);
- if (isa<UndefValue>(Val)) continue;
- printValueLoad(Val);
- printValueSave(Phi);
- }
-}
-
-
-void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB) {
- if (TrueBB==FalseBB) {
- // "TrueBB" and "FalseBB" destination equals
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("pop");
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- } else if (FalseBB==NULL) {
- // If "FalseBB" not used the jump have condition
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- } else if (TrueBB==NULL) {
- // If "TrueBB" not used the jump is unconditional
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- // Copy PHI instructions for each block
- std::string TmpLabel;
- // Print PHI instructions for "TrueBB"
- if (isa<PHINode>(TrueBB->begin())) {
- TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- } else {
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- }
- // Print PHI instructions for "FalseBB"
- if (isa<PHINode>(FalseBB->begin())) {
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- }
- if (isa<PHINode>(TrueBB->begin())) {
- // Handle "TrueBB" PHI Copy
- Out << TmpLabel << ":\n";
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- }
- }
-}
-
-
-void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
- if (Inst->isUnconditional()) {
- printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
- } else {
- printValueLoad(Inst->getCondition());
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
- Inst->getSuccessor(1));
- }
-}
-
-
-void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse) {
- std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
- printValueLoad(VTrue);
- printValueLoad(Cond);
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- printSimpleInstruction("pop");
- printValueLoad(VFalse);
- Out << TmpLabel << ":\n";
-}
-
-
-void MSILWriter::printIndirectLoad(const Value* V) {
- const Type* Ty = V->getType();
- printValueLoad(V);
- if (const PointerType* P = dyn_cast<PointerType>(Ty))
- Ty = P->getElementType();
- std::string Tmp = "ldind."+getTypePostfix(Ty, false);
- printSimpleInstruction(Tmp.c_str());
-}
-
-
-void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
- printValueLoad(Ptr);
- printValueLoad(Val);
- printIndirectSave(Val->getType());
-}
-
-
-void MSILWriter::printIndirectSave(const Type* Ty) {
- // Instruction need signed postfix for any type.
- std::string postfix = getTypePostfix(Ty, false);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
-}
-
-
-void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy) {
- std::string Tmp("");
- printValueLoad(V);
- switch (Op) {
- // Signed
- case Instruction::SExt:
- // If sign extending int, convert first from unsigned to signed
- // with the same bit size - because otherwise we will loose the sign.
- if (SrcTy) {
- Tmp = "conv."+getTypePostfix(SrcTy,false,true);
- printSimpleInstruction(Tmp.c_str());
- }
- // FALLTHROUGH
- case Instruction::SIToFP:
- case Instruction::FPToSI:
- Tmp = "conv."+getTypePostfix(Ty,false,true);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Unsigned
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPToUI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- Tmp = "conv."+getTypePostfix(Ty,false);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Do nothing
- case Instruction::BitCast:
- // FIXME: meaning that ld*/st* instruction do not change data format.
- break;
- default:
- errs() << "Opcode = " << Op << '\n';
- llvm_unreachable("Invalid conversion instruction");
- }
-}
-
-
-void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E) {
- unsigned Size;
- // Load address
- printValuePtrLoad(V);
- // Calculate element offset.
- for (; I!=E; ++I){
- Size = 0;
- const Value* IndexValue = I.getOperand();
- if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
- uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
- // Offset is the sum of all previous structure fields.
- for (uint64_t F = 0; F<FieldIndex; ++F)
- Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F));
- printPtrLoad(Size);
- printSimpleInstruction("add");
- continue;
- } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
- Size = TD->getTypeAllocSize(SeqTy->getElementType());
- } else {
- Size = TD->getTypeAllocSize(*I);
- }
- // Add offset of current element to stack top.
- if (!isZeroValue(IndexValue)) {
- // Constant optimization.
- if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
- if (C->getValue().isNegative()) {
- printPtrLoad(C->getValue().abs().getZExtValue()*Size);
- printSimpleInstruction("sub");
- continue;
- } else
- printPtrLoad(C->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValuePtrLoad(IndexValue);
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("add");
- }
- }
-}
-
-
-std::string MSILWriter::getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name) {
- std::string Tmp("");
- if (Ty->isVarArg()) Tmp += "vararg ";
- // Name and return type.
- Tmp += getTypeName(Ty->getReturnType())+Name+"(";
- // Function argument type list.
- unsigned NumParams = Ty->getNumParams();
- for (unsigned I = 0; I!=NumParams; ++I) {
- if (I!=0) Tmp += ",";
- Tmp += getTypeName(Ty->getParamType(I));
- }
- // CLR needs to know the exact amount of parameters received by vararg
- // function, because caller cleans the stack.
- if (Ty->isVarArg() && Inst) {
- // Origin to function arguments in "CallInst" or "InvokeInst".
- unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
- // Print variable argument types.
- unsigned NumOperands = Inst->getNumOperands()-Org;
- if (NumParams<NumOperands) {
- if (NumParams!=0) Tmp += ", ";
- Tmp += "... , ";
- for (unsigned J = NumParams; J!=NumOperands; ++J) {
- if (J!=NumParams) Tmp += ", ";
- Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
- }
- }
- }
- return Tmp+")";
-}
-
-
-void MSILWriter::printFunctionCall(const Value* FnVal,
- const Instruction* Inst) {
- // Get function calling convention.
- std::string Name = "";
- if (const CallInst* Call = dyn_cast<CallInst>(Inst))
- Name = getConvModopt(Call->getCallingConv());
- else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
- Name = getConvModopt(Invoke->getCallingConv());
- else {
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only");
- }
- if (const Function* F = dyn_cast<Function>(FnVal)) {
- // Direct call.
- Name += getValueName(F);
- printSimpleInstruction("call",
- getCallSignature(F->getFunctionType(),Inst,Name).c_str());
- } else {
- // Indirect function call.
- const PointerType* PTy = cast<PointerType>(FnVal->getType());
- const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
- // Load function address.
- printValueLoad(FnVal);
- printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
- }
-}
-
-
-void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
- std::string Name;
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- Name = getValueName(Inst->getArgOperand(0));
- Name.insert(Name.length()-1,"$valist");
- // Obtain the argument handle.
- printSimpleInstruction("ldloca",Name.c_str());
- printSimpleInstruction("arglist");
- printSimpleInstruction("call",
- "instance void [mscorlib]System.ArgIterator::.ctor"
- "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
- // Save as pointer type "void*"
- printValueLoad(Inst->getArgOperand(0));
- printSimpleInstruction("ldloca",Name.c_str());
- printIndirectSave(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)));
- break;
- case Intrinsic::vaend:
- // Close argument list handle.
- printIndirectLoad(Inst->getArgOperand(0));
- printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
- break;
- case Intrinsic::vacopy:
- // Copy "ArgIterator" valuetype.
- printIndirectLoad(Inst->getArgOperand(0));
- printIndirectLoad(Inst->getArgOperand(1));
- printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
- break;
- default:
- errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
- llvm_unreachable("Invalid intrinsic function");
- }
-}
-
-
-void MSILWriter::printCallInstruction(const Instruction* Inst) {
- if (isa<IntrinsicInst>(Inst)) {
- // Handle intrinsic function.
- printIntrinsicCall(cast<IntrinsicInst>(Inst));
- } else {
- const CallInst *CI = cast<CallInst>(Inst);
- // Load arguments to stack and call function.
- for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I)
- printValueLoad(CI->getArgOperand(I));
- printFunctionCall(CI->getCalledFunction(), Inst);
- }
-}
-
-
-void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- switch (Predicate) {
- case ICmpInst::ICMP_EQ:
- printBinaryInstruction("ceq",Left,Right);
- break;
- case ICmpInst::ICMP_NE:
- // Emulate = not neg (Op1 eq Op2)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_ULE)
- printBinaryInstruction("clt.un",Left,Right);
- else
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_UGE)
- printBinaryInstruction("cgt.un",Left,Right);
- else
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_ULT:
- printBinaryInstruction("clt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SLT:
- printBinaryInstruction("clt",Left,Right);
- break;
- case ICmpInst::ICMP_UGT:
- printBinaryInstruction("cgt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SGT:
- printBinaryInstruction("cgt",Left,Right);
- break;
- default:
- errs() << "Predicate = " << Predicate << '\n';
- llvm_unreachable("Invalid icmp predicate");
- }
-}
-
-
-void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- // FIXME: Correct comparison
- std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
- switch (Predicate) {
- case FCmpInst::FCMP_UGT:
- // X > Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("cgt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGT:
- // X > Y
- printBinaryInstruction("cgt",Left,Right);
- break;
- case FCmpInst::FCMP_UGE:
- // X >= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGE:
- // X >= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_ULT:
- // X < Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("clt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLT:
- // X < Y
- printBinaryInstruction("clt",Left,Right);
- break;
- case FCmpInst::FCMP_ULE:
- // X <= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLE:
- // X <= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UEQ:
- // X == Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OEQ:
- // X == Y
- printBinaryInstruction("ceq",Left,Right);
- break;
- case FCmpInst::FCMP_UNE:
- // X != Y
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ONE:
- // X != Y && llvm_fcmp_ord(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ORD:
- // return X == X && Y == Y
- printBinaryInstruction("ceq",Left,Left);
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UNO:
- // X != X || Y != Y
- printBinaryInstruction("ceq",Left,Left);
- printSimpleInstruction("not");
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("not");
- printSimpleInstruction("or");
- break;
- default:
- llvm_unreachable("Illegal FCmp predicate");
- }
-}
-
-
-void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
- std::string Label = "leave$normal_"+utostr(getUniqID());
- Out << ".try {\n";
- // Load arguments
- for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I)
- printValueLoad(Inst->getArgOperand(I));
- // Print call instruction
- printFunctionCall(Inst->getOperand(0),Inst);
- // Save function result and leave "try" block
- printValueSave(Inst);
- printSimpleInstruction("leave",Label.c_str());
- Out << "}\n";
- Out << "catch [mscorlib]System.Exception {\n";
- // Redirect to unwind block
- printSimpleInstruction("pop");
- printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
- Out << "}\n" << Label << ":\n";
- // Redirect to continue block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
-}
-
-
-void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
- // FIXME: Emulate with IL "switch" instruction
- // Emulate = if () else if () else if () else ...
- for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
- printValueLoad(Inst->getCondition());
- printValueLoad(Inst->getCaseValue(I));
- printSimpleInstruction("ceq");
- // Condition jump to successor block
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
- }
- // Jump to default block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
-}
-
-
-void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
- printIndirectLoad(Inst->getOperand(0));
- printSimpleInstruction("call",
- "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
- printSimpleInstruction("refanyval","void*");
- std::string Name =
- "ldind."+getTypePostfix(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)),false);
- printSimpleInstruction(Name.c_str());
-}
-
-
-void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
- uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType());
- // Constant optimization.
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
- printPtrLoad(CInt->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValueLoad(Inst->getOperand(0));
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("localloc");
-}
-
-
-void MSILWriter::printInstruction(const Instruction* Inst) {
- const Value *Left = 0, *Right = 0;
- if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
- if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
- // Print instruction
- // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
- switch (Inst->getOpcode()) {
- // Terminator
- case Instruction::Ret:
- if (Inst->getNumOperands()) {
- printValueLoad(Left);
- printSimpleInstruction("ret");
- } else
- printSimpleInstruction("ret");
- break;
- case Instruction::Br:
- printBranchInstruction(cast<BranchInst>(Inst));
- break;
- // Binary
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",Left,Right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",Left,Right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",Left,Right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",Left,Right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",Left,Right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",Left,Right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",Left,Right);
- break;
- // Binary Condition
- case Instruction::ICmp:
- printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- // Bitwise Binary
- case Instruction::And:
- printBinaryInstruction("and",Left,Right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",Left,Right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",Left,Right);
- break;
- case Instruction::Shl:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shl");
- break;
- case Instruction::LShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr.un");
- break;
- case Instruction::AShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr");
- break;
- case Instruction::Select:
- printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
- break;
- case Instruction::Load:
- printIndirectLoad(Inst->getOperand(0));
- break;
- case Instruction::Store:
- printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
- break;
- case Instruction::SExt:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy(),
- cast<CastInst>(Inst)->getSrcTy());
- break;
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
- gep_type_end(Inst));
- break;
- case Instruction::Call:
- printCallInstruction(cast<CallInst>(Inst));
- break;
- case Instruction::Invoke:
- printInvokeInstruction(cast<InvokeInst>(Inst));
- break;
- case Instruction::Unwind:
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor()");
- printSimpleInstruction("throw");
- break;
- case Instruction::Switch:
- printSwitchInstruction(cast<SwitchInst>(Inst));
- break;
- case Instruction::Alloca:
- printAllocaInstruction(cast<AllocaInst>(Inst));
- break;
- case Instruction::Unreachable:
- printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor(string)");
- printSimpleInstruction("throw");
- break;
- case Instruction::VAArg:
- printVAArgInstruction(cast<VAArgInst>(Inst));
- break;
- default:
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Unsupported instruction");
- }
-}
-
-
-void MSILWriter::printLoop(const Loop* L) {
- Out << getLabelName(L->getHeader()->getName()) << ":\n";
- const std::vector<BasicBlock*>& blocks = L->getBlocks();
- for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
- BasicBlock* BB = blocks[I];
- Loop* BBLoop = LInfo->getLoopFor(BB);
- if (BBLoop == L)
- printBasicBlock(BB);
- else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
- printLoop(BBLoop);
- }
- printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
-}
-
-
-void MSILWriter::printBasicBlock(const BasicBlock* BB) {
- Out << getLabelName(BB) << ":\n";
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
- const Instruction* Inst = I;
- // Comment llvm original instruction
- // Out << "\n//" << *Inst << "\n";
- // Do not handle PHI instruction in current block
- if (Inst->getOpcode()==Instruction::PHI) continue;
- // Print instruction
- printInstruction(Inst);
- // Save result
- if (Inst->getType()!=Type::getVoidTy(BB->getContext())) {
- // Do not save value after invoke, it done in "try" block
- if (Inst->getOpcode()==Instruction::Invoke) continue;
- printValueSave(Inst);
- }
- }
-}
-
-
-void MSILWriter::printLocalVariables(const Function& F) {
- std::string Name;
- const Type* Ty = NULL;
- std::set<const Value*> Printed;
- const Value* VaList = NULL;
- unsigned StackDepth = 8;
- // Find local variables
- for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
- if (I->getOpcode()==Instruction::Call ||
- I->getOpcode()==Instruction::Invoke) {
- // Test stack depth.
- if (StackDepth<I->getNumOperands())
- StackDepth = I->getNumOperands();
- }
- const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
- if (AI && !isa<GlobalVariable>(AI)) {
- // Local variable allocation.
- Ty = PointerType::getUnqual(AI->getAllocatedType());
- Name = getValueName(AI);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- } else if (I->getType()!=Type::getVoidTy(F.getContext())) {
- // Operation result.
- Ty = I->getType();
- Name = getValueName(&*I);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- }
- // Test on 'va_list' variable
- bool isVaList = false;
- if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
- // "va_list" as "va_arg" instruction operand.
- isVaList = true;
- VaList = VaInst->getOperand(0);
- } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
- // "va_list" as intrinsic function operand.
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- case Intrinsic::vaend:
- case Intrinsic::vacopy:
- isVaList = true;
- VaList = Inst->getArgOperand(0);
- break;
- default:
- isVaList = false;
- }
- }
- // Print "va_list" variable.
- if (isVaList && Printed.insert(VaList).second) {
- Name = getValueName(VaList);
- Name.insert(Name.length()-1,"$valist");
- Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
- << Name << ")\n";
- }
- }
- printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
-}
-
-
-void MSILWriter::printFunctionBody(const Function& F) {
- // Print body
- for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
- if (Loop *L = LInfo->getLoopFor(I)) {
- if (L->getHeader()==I && L->getParentLoop()==0)
- printLoop(L);
- } else {
- printBasicBlock(I);
- }
- }
-}
-
-
-void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
- const Value *left = 0, *right = 0;
- if (CE->getNumOperands()>=1) left = CE->getOperand(0);
- if (CE->getNumOperands()>=2) right = CE->getOperand(1);
- // Print instruction
- switch (CE->getOpcode()) {
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(CE->getOpcode(),left,CE->getType());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
- break;
- case Instruction::ICmp:
- printICmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::Select:
- printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
- break;
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",left,right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",left,right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",left,right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",left,right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",left,right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",left,right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",left,right);
- break;
- case Instruction::And:
- printBinaryInstruction("and",left,right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",left,right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",left,right);
- break;
- case Instruction::Shl:
- printBinaryInstruction("shl",left,right);
- break;
- case Instruction::LShr:
- printBinaryInstruction("shr.un",left,right);
- break;
- case Instruction::AShr:
- printBinaryInstruction("shr",left,right);
- break;
- default:
- errs() << "Expression = " << *CE << "\n";
- llvm_unreachable("Invalid constant expression");
- }
-}
-
-
-void MSILWriter::printStaticInitializerList() {
- // List of global variables with uninitialized fields.
- for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
- VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
- ++VarI) {
- const std::vector<StaticInitializer>& InitList = VarI->second;
- if (InitList.empty()) continue;
- // For each uninitialized field.
- for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
- E = InitList.end(); I!=E; ++I) {
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
- // Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
- // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
- // Load variable address
- printValueLoad(VarI->first);
- // Add offset
- if (I->offset!=0) {
- printPtrLoad(I->offset);
- printSimpleInstruction("add");
- }
- // Load value
- printConstantExpr(CE);
- // Save result at offset
- std::string postfix = getTypePostfix(CE->getType(),true);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
- } else {
- errs() << "Constant = " << *I->constant << '\n';
- llvm_unreachable("Invalid static initializer");
- }
- }
- }
-}
-
-
-void MSILWriter::printFunction(const Function& F) {
- bool isSigned = F.paramHasAttr(0, Attribute::SExt);
- Out << "\n.method static ";
- Out << (F.hasLocalLinkage() ? "private " : "public ");
- if (F.isVarArg()) Out << "vararg ";
- Out << getTypeName(F.getReturnType(),isSigned) <<
- getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
- // Arguments
- Out << "\t(";
- unsigned ArgIdx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
- ++I, ++ArgIdx) {
- isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt);
- if (I!=F.arg_begin()) Out << ", ";
- Out << getTypeName(I->getType(),isSigned) << getValueName(I);
- }
- Out << ") cil managed\n";
- // Body
- Out << "{\n";
- printLocalVariables(F);
- printFunctionBody(F);
- Out << "}\n";
-}
-
-
-void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
- std::string Name;
- std::set<const Type*> Printed;
- for (std::set<const Type*>::const_iterator
- UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
- const Type* Ty = *UI;
- if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
- Name = getTypeName(Ty, false, true);
- // Type with no need to declare.
- else continue;
- // Print not duplicated type
- if (Printed.insert(Ty).second) {
- Out << ".class value explicit ansi sealed '" << Name << "'";
- Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty);
- Out << " }\n\n";
- }
- }
-}
-
-
-unsigned int MSILWriter::getBitWidth(const Type* Ty) {
- unsigned int N = Ty->getPrimitiveSizeInBits();
- assert(N!=0 && "Invalid type in getBitWidth()");
- switch (N) {
- case 1:
- case 8:
- case 16:
- case 32:
- case 64:
- return N;
- default:
- errs() << "Bits = " << N << '\n';
- llvm_unreachable("Unsupported integer width");
- }
- return 0; // Not reached
-}
-
-
-void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
- uint64_t TySize = 0;
- const Type* Ty = C->getType();
- // Print zero initialized constant.
- if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
- TySize = TD->getTypeAllocSize(C->getType());
- Offset += TySize;
- Out << "int8 (0) [" << TySize << "]";
- return;
- }
- // Print constant initializer
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantInt* Int = cast<ConstantInt>(C);
- Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
- break;
- }
- case Type::FloatTyID:
- case Type::DoubleTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantFP* FP = cast<ConstantFP>(C);
- if (Ty->getTypeID() == Type::FloatTyID)
- Out << "int32 (" <<
- (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- else
- Out << "int64 (" <<
- FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- break;
- }
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
- if (I!=0) Out << ",\n";
- printStaticConstant(cast<Constant>(C->getOperand(I)), Offset);
- }
- break;
- case Type::PointerTyID:
- TySize = TD->getTypeAllocSize(C->getType());
- // Initialize with global variable address
- if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
- std::string name = getValueName(G);
- Out << "&(" << name.insert(name.length()-1,"$data") << ")";
- } else {
- // Dynamic initialization
- if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
- InitListPtr->push_back(StaticInitializer(C,Offset));
- // Null pointer initialization
- if (TySize==4) Out << "int32 (0)";
- else if (TySize==8) Out << "int64 (0)";
- else llvm_unreachable("Invalid pointer size");
- }
- break;
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in printStaticConstant()");
- }
- // Increase offset.
- Offset += TySize;
-}
-
-
-void MSILWriter::printStaticInitializer(const Constant* C,
- const std::string& Name) {
- switch (C->getType()->getTypeID()) {
- case Type::IntegerTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- Out << getPrimitiveTypeName(C->getType(), false);
- break;
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- case Type::PointerTyID:
- Out << getTypeName(C->getType());
- break;
- default:
- errs() << "Type = " << *C << "\n";
- llvm_unreachable("Invalid constant type");
- }
- // Print initializer
- std::string label = Name;
- label.insert(label.length()-1,"$data");
- Out << Name << " at " << label << '\n';
- Out << ".data " << label << " = {\n";
- uint64_t offset = 0;
- printStaticConstant(C,offset);
- Out << "\n}\n\n";
-}
-
-
-void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
- const Constant* C = G->getInitializer();
- if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
- InitListPtr = 0;
- else
- InitListPtr = &StaticInitList[G];
- printStaticInitializer(C,getValueName(G));
-}
-
-
-void MSILWriter::printGlobalVariables() {
- if (ModulePtr->global_empty()) return;
- Module::global_iterator I,E;
- for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
- // Variable definition
- Out << ".field static " << (I->isDeclaration() ? "public " :
- "private ");
- if (I->isDeclaration()) {
- Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
- } else
- printVariableDefinition(&*I);
- }
-}
-
-
-const char* MSILWriter::getLibraryName(const Function* F) {
- return getLibraryForSymbol(F->getName(), true, F->getCallingConv());
-}
-
-
-const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
- return getLibraryForSymbol(GV->getName(), false, CallingConv::C);
-}
-
-
-const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv) {
- // TODO: Read *.def file with function and libraries definitions.
- return "MSVCRT.DLL";
-}
-
-
-void MSILWriter::printExternals() {
- Module::const_iterator I,E;
- // Functions.
- for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
- // Skip intrisics
- if (I->isIntrinsic()) continue;
- if (I->isDeclaration()) {
- const Function* F = I;
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- std::string Sig =
- getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
- Out << ".method static hidebysig pinvokeimpl(\""
- << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
- }
- }
- // External variables and static initialization.
- Out <<
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int LoadLibrary(string) preservesig {}\n"
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int GetProcAddress(native int, string) preservesig {}\n";
- Out <<
- ".method private static void* $MSIL_Import(string lib,string sym)\n"
- " managed cil\n{\n"
- "\tldarg\tlib\n"
- "\tcall\tnative int LoadLibrary(string)\n"
- "\tldarg\tsym\n"
- "\tcall\tnative int GetProcAddress(native int,string)\n"
- "\tdup\n"
- "\tbrtrue\tL_01\n"
- "\tldstr\t\"Can no import variable\"\n"
- "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
- "\tthrow\n"
- "L_01:\n"
- "\tret\n"
- "}\n\n"
- ".method static private void $MSIL_Init() managed cil\n{\n";
- printStaticInitializerList();
- // Foreach global variable.
- for (Module::global_iterator I = ModulePtr->global_begin(),
- E = ModulePtr->global_end(); I!=E; ++I) {
- if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
- // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
- std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
- Out << "\tldstr\t\"" << I->getName() << "\"\n";
- printSimpleInstruction("call","void* $MSIL_Import(string,string)");
- printIndirectSave(I->getType());
- }
- printSimpleInstruction("ret");
- Out << "}\n\n";
-}
-
-
-//===----------------------------------------------------------------------===//
-// External Interface declaration
-//===----------------------------------------------------------------------===//
-
-bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify)
-{
- if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
- MSILWriter* Writer = new MSILWriter(o);
- PM.add(createGCLoweringPass());
- // FIXME: Handle switch through native IL instruction "switch"
- PM.add(createLowerSwitchPass());
- PM.add(createCFGSimplificationPass());
- PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
- PM.add(Writer);
- PM.add(createGCInfoDeleter());
- return false;
-}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
deleted file mode 100644
index 92a3abe5c0a7..000000000000
--- a/lib/Target/MSIL/MSILWriter.h
+++ /dev/null
@@ -1,258 +0,0 @@
-//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MSILWriter that is used by the MSIL.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MSILWRITER_H
-#define MSILWRITER_H
-
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- extern Target TheMSILTarget;
-
- class MSILModule : public ModulePass {
- Module *ModulePtr;
- const std::set<const Type *>*& UsedTypes;
- const TargetData*& TD;
-
- public:
- static char ID;
- MSILModule(const std::set<const Type *>*& _UsedTypes,
- const TargetData*& _TD)
- : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<FindUsedTypes>();
- AU.addRequired<TargetData>();
- }
-
- virtual const char *getPassName() const {
- return "MSIL backend definitions";
- }
-
- virtual bool runOnModule(Module &M);
-
- };
-
- class MSILWriter : public FunctionPass {
- struct StaticInitializer {
- const Constant* constant;
- uint64_t offset;
-
- StaticInitializer()
- : constant(0), offset(0) {}
-
- StaticInitializer(const Constant* _constant, uint64_t _offset)
- : constant(_constant), offset(_offset) {}
- };
-
- uint64_t UniqID;
-
- uint64_t getUniqID() {
- return ++UniqID;
- }
-
- public:
- formatted_raw_ostream &Out;
- Module* ModulePtr;
- const TargetData* TD;
- LoopInfo *LInfo;
- std::vector<StaticInitializer>* InitListPtr;
- std::map<const GlobalVariable*,std::vector<StaticInitializer> >
- StaticInitList;
- const std::set<const Type *>* UsedTypes;
- static char ID;
- DenseMap<const Value*, unsigned> AnonValueNumbers;
- unsigned NextAnonValueNumber;
-
- MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o),
- NextAnonValueNumber(0) {
- UniqID = 0;
- }
-
- enum ValueType {
- UndefVT,
- GlobalVT,
- InternalVT,
- ArgumentVT,
- LocalVT,
- ConstVT,
- ConstExprVT
- };
-
- bool isVariable(ValueType V) {
- return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
- }
-
- bool isConstValue(ValueType V) {
- return V==ConstVT || V==ConstExprVT;
- }
-
- virtual const char *getPassName() const { return "MSIL backend"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.setPreservesAll();
- }
-
- bool runOnFunction(Function &F);
-
- virtual bool doInitialization(Module &M);
-
- virtual bool doFinalization(Module &M);
-
- void printModuleStartup();
-
- bool isZeroValue(const Value* V);
-
- std::string getValueName(const Value* V);
-
- std::string getLabelName(const Value* V);
-
- std::string getLabelName(const std::string& Name);
-
- std::string getConvModopt(CallingConv::ID CallingConvID);
-
- std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
-
- std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
-
- std::string getFunctionTypeName(const Type* Ty);
-
- std::string getPointerTypeName(const Type* Ty);
-
- std::string getTypeName(const Type* Ty, bool isSigned = false,
- bool isNested = false);
-
- ValueType getValueLocation(const Value* V);
-
- std::string getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned = false);
-
- void printConvToPtr();
-
- void printPtrLoad(uint64_t N);
-
- void printValuePtrLoad(const Value* V);
-
- void printConstLoad(const Constant* C);
-
- void printValueLoad(const Value* V);
-
- void printValueSave(const Value* V);
-
- void printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right);
-
- void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
-
- void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
-
- void printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB);
-
- void printBranchInstruction(const BranchInst* Inst);
-
- void printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse);
-
- void printIndirectLoad(const Value* V);
-
- void printIndirectSave(const Value* Ptr, const Value* Val);
-
- void printIndirectSave(const Type* Ty);
-
- void printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy=0);
-
- void printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E);
-
- std::string getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name);
-
- void printFunctionCall(const Value* FnVal, const Instruction* Inst);
-
- void printIntrinsicCall(const IntrinsicInst* Inst);
-
- void printCallInstruction(const Instruction* Inst);
-
- void printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printInvokeInstruction(const InvokeInst* Inst);
-
- void printSwitchInstruction(const SwitchInst* Inst);
-
- void printVAArgInstruction(const VAArgInst* Inst);
-
- void printAllocaInstruction(const AllocaInst* Inst);
-
- void printInstruction(const Instruction* Inst);
-
- void printLoop(const Loop* L);
-
- void printBasicBlock(const BasicBlock* BB);
-
- void printLocalVariables(const Function& F);
-
- void printFunctionBody(const Function& F);
-
- void printConstantExpr(const ConstantExpr* CE);
-
- void printStaticInitializerList();
-
- void printFunction(const Function& F);
-
- void printDeclarations(const TypeSymbolTable& ST);
-
- unsigned int getBitWidth(const Type* Ty);
-
- void printStaticConstant(const Constant* C, uint64_t& Offset);
-
- void printStaticInitializer(const Constant* C, const std::string& Name);
-
- void printVariableDefinition(const GlobalVariable* G);
-
- void printGlobalVariables();
-
- const char* getLibraryName(const Function* F);
-
- const char* getLibraryName(const GlobalVariable* GV);
-
- const char* getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv);
-
- void printExternals();
- };
-
-}
-
-#endif
-
diff --git a/lib/Target/MSIL/Makefile b/lib/Target/MSIL/Makefile
deleted file mode 100644
index 70eadb32e360..000000000000
--- a/lib/Target/MSIL/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMMSIL
-DIRS = TargetInfo
-
-include $(LEVEL)/Makefile.common
-
-CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
deleted file mode 100644
index d797c71fd39f..000000000000
--- a/lib/Target/MSIL/README.TXT
+++ /dev/null
@@ -1,26 +0,0 @@
-//===---------------------------------------------------------------------===//
-
-Vector instructions support.
-
-ShuffleVector
-ExtractElement
-InsertElement
-
-//===---------------------------------------------------------------------===//
-
-Add "OpaqueType" type.
-
-//===---------------------------------------------------------------------===//
-
-"switch" instruction emulation with CLI "switch" instruction.
-
-//===---------------------------------------------------------------------===//
-
-Write linker for external function, because function export need to know
-dynamic library where function located.
-
-.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
- void free(void*) preservesig {}
-
-
-
diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 9f0c3a09341a..000000000000
--- a/lib/Target/MSIL/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMSILInfo
- MSILTargetInfo.cpp
- )
-
diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
deleted file mode 100644
index dfd42814e51c..000000000000
--- a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheMSILTarget;
-
-static unsigned MSIL_TripleMatchQuality(const std::string &TT) {
- // This class always works, but shouldn't be the default in most cases.
- return 1;
-}
-
-extern "C" void LLVMInitializeMSILTargetInfo() {
- TargetRegistry::RegisterTarget(TheMSILTarget, "msil",
- "MSIL backend",
- &MSIL_TripleMatchQuality);
-}
diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile
deleted file mode 100644
index 30b0950db0f7..000000000000
--- a/lib/Target/MSIL/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSILInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 68cb342b08f4..bd644435c76f 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 10 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct MSP430BSel : public MachineFunctionPass {
static char ID;
- MSP430BSel() : MachineFunctionPass(&ID) {}
+ MSP430BSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -52,7 +52,8 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() {
}
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index df28d07f5d71..bfab844f5b1a 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -100,27 +100,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
bool
-MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers yet.
-
- switch (MI.getOpcode()) {
- default:
- return false;
- case MSP430::MOV8rr:
- case MSP430::MOV16rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
-}
-
-bool
MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -361,7 +340,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (Desc.getOpcode()) {
default:
assert(0 && "Unknown instruction size!");
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index ebbda1aeef51..49ccc032bf29 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -54,10 +54,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 608ca49fcf78..3c3fa73477a5 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -101,7 +101,7 @@ bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
MFI->isFrameAddressTaken());
}
-bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -163,10 +163,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -204,7 +203,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(BasePtr, false);
if (Offset == 0)
- return 0;
+ return;
// We need to materialize the offset via add instruction.
unsigned DstReg = MI.getOperand(0).getReg();
@@ -215,12 +214,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
.addReg(DstReg).addImm(Offset);
- return 0;
+ return;
}
MI.getOperand(i).ChangeToRegister(BasePtr, false);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 6e58d3116d27..4d2795bb4020 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -40,15 +40,14 @@ public:
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 2037a9114559..49efe75d79d8 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -180,7 +180,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+ GV->hasLinkerPrivateWeakDefAutoLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 8ae05b75e919..6660f6b62430 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -18,6 +18,8 @@
#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "MipsMachineFunction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -75,6 +77,7 @@ namespace {
}
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
static const char *getRegisterName(unsigned RegNo);
virtual void EmitFunctionEntryLabel();
@@ -227,6 +230,23 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index aa036aef83d0..a51c3779c7f4 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index c2bfb8fa738c..8f313efaf8da 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index a2b615d8add2..597ea0d6c207 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "Mips Delay Slot Filler";
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 3888bbf09ec7..a47cf7b4f201 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -137,7 +137,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
@@ -184,8 +184,9 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
if (!Subtarget.isMips1() || NVT != MVT::f64)
return NULL;
- if (!Predicate_unindexedload(N) ||
- !Predicate_load(N))
+ LoadSDNode *LN = cast<LoadSDNode>(N);
+ if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+ LN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue Chain = N->getOperand(0);
@@ -248,8 +249,8 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
SDValue Chain = N->getOperand(0);
- if (!Predicate_unindexedstore(N) ||
- !Predicate_store(N))
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue N1 = N->getOperand(1);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b6ff2c371d5c..b0b99bad1607 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -317,13 +317,13 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
// sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
// ...
BB = sinkMBB;
BuildMI(*BB, BB->begin(), dl,
TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -542,7 +542,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
- if (IsPIC) {
+ if (!IsPIC) {
SDValue Ops[] = { JTI };
HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
} else // Emit Load from Global Pointer
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index e948917eb80e..cff79966dcd3 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 0853272f7280..98ae2fa7da45 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6c09a3e10785..aaf307b1ce3f 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -30,53 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MipsInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const
-{
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // addu $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) {
- if (MI.getOperand(1).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // mov $fpDst, $fpSrc
- // mfc $gpDst, $fpSrc
- // mtc $fpDst, $gpSrc
- if (MI.getOpcode() == Mips::FMOV_S32 ||
- MI.getOpcode() == Mips::FMOV_D32 ||
- MI.getOpcode() == Mips::MFC1 ||
- MI.getOpcode() == Mips::MTC1 ||
- MI.getOpcode() == Mips::MOVCCRToCCR) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- // addiu $dst, $src, 0
- if (MI.getOpcode() == Mips::ADDiu) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index d6f87f9b0ce8..52a3d39840ba 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -174,12 +174,6 @@ public:
///
virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5337c9fb816a..320c5b883483 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -96,12 +96,7 @@ def HI16 : SDNodeXForm<imm, [{
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
-def immSExt16 : PatLeaf<(imm), [{
- if (N->getValueType(0) == MVT::i32)
- return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
- else
- return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
-}]>;
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e15f0a58e501..69436d2acb54 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -327,10 +327,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MipsRegisterInfo::
+void MipsRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -361,7 +360,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(i-1).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b500a650f7cc..89282f8fa146 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -51,9 +51,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index be78a2266268..60efe31fbaf8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 616a79bf831c..055ff3237218 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index cd4afe8e2342..2b6cb9e4e461 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
tablegen(PIC16GenCallingConv.inc -gen-callingconv)
tablegen(PIC16GenSubtarget.inc -gen-subtarget)
-add_llvm_target(PIC16
+add_llvm_target(PIC16CodeGen
PIC16DebugInfo.cpp
PIC16InstrInfo.cpp
PIC16ISelDAGToDAG.cpp
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index cee55f4f260f..08bb3e6f055b 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -58,13 +58,10 @@ namespace PIC16CC {
ESNames() {}
public:
~ESNames() {
- std::vector<char*>::iterator it = stk.end();
- it--;
- while(stk.end() != stk.begin())
+ while (!stk.empty())
{
- char* p = *it;
+ char* p = stk.back();
delete [] p;
- it--;
stk.pop_back();
}
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 54a6a28992bf..527b31d0cc9f 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -312,6 +312,16 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
computeRegisterProperties();
}
+std::pair<const TargetRegisterClass*, uint8_t>
+PIC16TargetLowering::findRepresentativeClass(EVT VT) const {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i16:
+ return std::make_pair(PIC16::FSR16RegisterClass, 1);
+ }
+}
+
// getOutFlag - Extract the flag result if the Op has it.
static SDValue getOutFlag(SDValue &Op) {
// Flag is the last value of the node.
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index 0a7506cb497f..d942af46a9e9 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -50,7 +50,7 @@ namespace llvm {
CALL, // PIC16 Call instruction
CALLW, // PIC16 CALLW instruction
SUBCC, // Compare for equality or inequality.
- SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond.
+ SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond.
BRCOND, // Conditional branch.
RET, // Return.
Dummy
@@ -181,6 +181,9 @@ namespace llvm {
// FIXME: The function never seems to be aligned.
return 1;
}
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
private:
// If the Node is a BUILD_PAIR representing a direct Address,
// then this function will return true.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index e784f746f7f9..81257f3c4108 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -167,21 +167,6 @@ void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DestReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- if (MI.getOpcode() == PIC16::copy_fsr
- || MI.getOpcode() == PIC16::copy_w) {
- DestReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- return false;
-}
-
/// InsertBranch - Insert a branch into the end of the specified
/// MachineBasicBlock. This operands to this method are the same as those
/// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index a3a77f11ba16..661b335d3b6c 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -61,10 +61,6 @@ public:
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 241170b11c2a..b6aa38f765ea 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
namespace {
struct MemSelOpt : public MachineFunctionPass {
static char ID;
- MemSelOpt() : MachineFunctionPass(&ID) {}
+ MemSelOpt() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(MachineLoopInfoID);
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index 27f1cf572ae6..56f021157092 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -256,7 +256,7 @@ PIC16Cloner::cloneFunction(Function *OrgF) {
CloneAutos(OrgF);
// Now create the clone.
- ClonedF = CloneFunction(OrgF, VMap);
+ ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false);
// The new function should be for interrupt line. Therefore should have
// the name suffixed with IL and section attribute marked with IL.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
index e8b5aa45cdca..e7d67ce09629 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -35,7 +35,7 @@ namespace llvm {
class PIC16Cloner : public ModulePass {
public:
static char ID; // Class identification
- PIC16Cloner() : ModulePass(&ID) {}
+ PIC16Cloner() : ModulePass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraph>();
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 5ecb6aa55157..0f8928a4b5f5 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -171,8 +171,9 @@ void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) {
for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E;
++I) {
- if ((!isa<CallInst>(I) && !isa<InvokeInst>(I))
- || !CallSite(cast<Instruction>(I)).isCallee(I)) {
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
setColor(MI, ++IndirectCallColor);
break;
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index 5a2551fabcda..2f611e65de1f 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -39,7 +39,7 @@ namespace llvm {
unsigned IndirectCallColor;
public:
static char ID; // Class identification
- PIC16Overlay() : ModulePass(&ID) {
+ PIC16Overlay() : ModulePass(ID) {
OverlayStr = "Overlay=";
InterruptDepth = PIC16OVERLAY::StartInterruptColor;
IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index dff98d12c2ae..76de47fdf0f4 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -44,13 +44,10 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
return false;
}
-unsigned PIC16RegisterInfo::
+void PIC16RegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
- /* NOT YET IMPLEMENTED */
- return 0;
-}
+ RegScavenger *RS) const
+{ /* NOT YET IMPLEMENTED */ }
void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
{ /* NOT YET IMPLEMENTED */ }
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 5536a617d2be..20052b003442 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -44,9 +44,8 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool hasFP(const MachineFunction &MF) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS=NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index e35dc579f2cd..c1a5663be931 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -327,6 +328,19 @@ namespace {
void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 52948c868b9c..e161d23600e2 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 16 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -31,7 +31,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(&ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -53,7 +53,8 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
}
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 155fba22d9d7..441db94581ae 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,4 +1,4 @@
-//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 361fa70fb4c4..df9ab52389ba 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -45,7 +45,7 @@ namespace {
public:
PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), TM(tm), MCE(mce) {}
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -110,7 +110,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
default:
MCE.emitWordBE(getBinaryCodeForInstr(MI));
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d47d989b34c0..14d1b154a5c9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2467,18 +2467,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType());
- else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
// If this is an absolute destination address, use the munged value.
Callee = SDValue(Dest, 0);
- else {
+ needIndirectCall = false;
+ }
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ }
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
@@ -3942,17 +3955,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1574aa3fb23a..c17108fa9230 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,11 @@
#include "PPCGenInstrInfo.inc"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -36,67 +39,6 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
RI(*TM.getSubtargetImpl(), *this) {}
-bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& sourceSubIdx,
- unsigned& destSubIdx) const {
- sourceSubIdx = destSubIdx = 0; // No sub-registers.
-
- unsigned oc = MI.getOpcode();
- if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
- oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid PPC OR instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ADDI) { // addi r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ADDI instruction!");
- if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ORI) { // ori r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ORI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC FMR instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC MCRF instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -524,6 +466,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
void
@@ -637,6 +587,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
MachineInstr*
@@ -667,7 +625,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
- case PPC::DBG_LABEL:
+ case PPC::PROLOG_LABEL:
case PPC::EH_LABEL:
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index eadb21e21702..fc7b7b3cb897 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -82,12 +82,6 @@ public:
///
virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 63b4581a37f9..eb100ec75280 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1022,9 +1022,7 @@ let Uses = [RM] in {
}
}
-/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
-///
-/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
@@ -1032,10 +1030,6 @@ def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
[]>, // (set F4RC:$frD, F4RC:$frB)
PPC970_Unit_Pseudo;
-def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fextend F4RC:$frB))]>,
- PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
@@ -1476,10 +1470,13 @@ def : Pat<(extloadi16 iaddr:$src),
(LHZ iaddr:$src)>;
def : Pat<(extloadi16 xaddr:$src),
(LHZX xaddr:$src)>;
-def : Pat<(extloadf32 iaddr:$src),
- (FMRSD (LFS iaddr:$src))>;
-def : Pat<(extloadf32 xaddr:$src),
- (FMRSD (LFSX xaddr:$src))>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+ (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4d6132a9ec50..653e143ba407 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -449,8 +449,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Get stack alignments.
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
- assert(MaxAlign <= TargetAlign &&
- "Dynamic alloca with large aligns not supported");
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -580,10 +580,9 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-unsigned
+void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -622,14 +621,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
lowerDynamicAlloc(II, SPAdj, RS);
- return 0;
+ return;
}
// Special case for pseudo-op SPILL_CR.
if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return 0;
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -674,7 +673,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// The offset doesn't fit into a single register, scavenge one to build the
@@ -710,11 +709,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
OperandBase = OffsetOperandNo;
}
-
+
unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
- return 0;
}
/// VRRegNo - Map from a numbered VR register to its enum value.
@@ -1318,7 +1316,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
// Show update of SP.
if (NegFrameSize) {
@@ -1361,7 +1359,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
ReadyLabel = MMI.getContext().CreateTempSymbol();
// Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
(isPPC64 ? PPC::X1 : PPC::R1));
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f026847a540b..890b24b9c0a8 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -63,9 +63,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 40914ba62a70..5d46065d96f2 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
, HasFSQRT(false)
, HasSTFIWX(false)
, HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
, DarwinVers(0) {
// Determine default and user specified characteristics
@@ -117,6 +118,9 @@ void PPCSubtarget::SetJITMode() {
// everything is. This matters for PPC64, which codegens in PIC mode without
// stubs.
HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 75fcf6238a27..00ec7474c9e3 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -63,6 +63,7 @@ protected:
bool HasFSQRT;
bool HasSTFIWX;
bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
/// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -124,6 +125,9 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4d7ee08de1de..4faf8bcfd419 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1919,5 +1919,21 @@ something like the following, which eliminates a branch:
ret
.LBB0_2:
jmp foo # TAILCALL
+//===---------------------------------------------------------------------===//
+Given a branch where the two target blocks are identical ("ret i32 %b" in
+both), simplifycfg will simplify them away. But not so for a switch statement:
+
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ switch i32 %a, label %bb3 [
+ i32 4, label %bb
+ i32 6, label %bb
+ ]
+bb: ; preds = %entry, %entry
+ ret i32 %b
+
+bb3: ; preds = %entry
+ ret i32 %b
+}
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 9e148ada8853..aae5da856005 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "SPARC Delay Slot Filler";
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 88b0927b3550..1423b1e64d66 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -36,7 +36,7 @@ namespace {
static char ID;
explicit FPMover(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Sparc Double-FP Move Fixer";
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 925d782d988b..764336665d0b 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,4 +1,4 @@
-//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 698923e3c9e0..4ea94c4cb560 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -84,7 +84,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_simm13(CN)) {
+ if (isInt<13>(CN->getSExtValue())) {
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
@@ -120,9 +120,9 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
- if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
- Predicate_simm13(Addr.getOperand(1).getNode()))
- return false; // Let the reg+imm pattern catch this!
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
Addr.getOperand(1).getOpcode() == SPISD::Lo)
return false; // Let the reg+imm pattern catch this!
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 3a4c80ad076a..7ede8e7ebbe4 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -28,46 +28,6 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
RI(ST, *this), Subtarget(ST) {
}
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
-}
-
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 3 kinds of patterns here:
- // or with G0 or 0
- // add with G0 or 0
- // fmovs or FpMOVD (pseudo double move).
- if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
- if (MI.getOperand(1).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
- isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
- MI.getOpcode() == SP::FMOVD) {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 133471857bad..c00bd2198765 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -43,12 +43,6 @@ public:
///
virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index ddadd51a93a4..467ed48487ad 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -43,17 +43,9 @@ def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
-def simm11 : PatLeaf<(imm), [{
- // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
- return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
- (int)N->getZExtValue();
-}]>;
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
-def simm13 : PatLeaf<(imm), [{
- // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
- return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
- (int)N->getZExtValue();
-}]>;
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
def LO10 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 427cc7fd4577..c85db20d2b74 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -69,10 +69,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -108,7 +107,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(SP::G1, false);
MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
}
- return 0;
}
void SparcRegisterInfo::
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9f0cda707b3e..020ce567c956 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,9 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c03864fe41e4..367bed3a8539 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -141,31 +141,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool
-SystemZInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case SystemZ::MOV32rr:
- case SystemZ::MOV64rr:
- case SystemZ::MOV64rrP:
- case SystemZ::MOV128rr:
- case SystemZ::FMOV32rr:
- case SystemZ::FMOV64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 0559619248a6..c248f2489c49 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -65,9 +65,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ae96b0b08ff6..f8d3e6ac8a6f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -92,10 +92,9 @@ int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
return Offset;
}
-unsigned
+void
SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unxpected");
unsigned i = 0;
@@ -117,13 +116,13 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a either 12-bit unsigned or 20-bit signed integer.
// FIXME: handle "too long" displacements.
- int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+ int Offset =
+ getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
// Check whether displacement is too long to fit into 12 bit zext field.
MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 670025f86e08..5dae865cb79a 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -34,7 +34,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
bool hasFP(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
@@ -43,9 +43,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 5870d8a87004..f35c96dadcee 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -34,8 +34,7 @@ using namespace llvm;
// Handle the Pass registration stuff necessary to use TargetData's.
// Register the default SparcV9 implementation...
-static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false,
- true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
char TargetData::ID = 0;
//===----------------------------------------------------------------------===//
@@ -98,8 +97,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
//===----------------------------------------------------------------------===//
TargetAlignElem
-TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
TargetAlignElem retval;
retval.AlignType = align_type;
@@ -197,10 +196,10 @@ void TargetData::init(StringRef Desc) {
}
unsigned Size = getInt(Specifier.substr(1));
Split = Token.split(':');
- unsigned char ABIAlign = getInt(Split.first) / 8;
+ unsigned ABIAlign = getInt(Split.first) / 8;
Split = Split.second.split(':');
- unsigned char PrefAlign = getInt(Split.first) / 8;
+ unsigned PrefAlign = getInt(Split.first) / 8;
if (PrefAlign == 0)
PrefAlign = ABIAlign;
setAlignment(AlignType, ABIAlign, PrefAlign, Size);
@@ -227,19 +226,19 @@ void TargetData::init(StringRef Desc) {
///
/// @note This has to exist, because this is a pass, but it should never be
/// used.
-TargetData::TargetData() : ImmutablePass(&ID) {
+TargetData::TargetData() : ImmutablePass(ID) {
report_fatal_error("Bad TargetData ctor used. "
"Tool did not specify a TargetData to use?");
}
TargetData::TargetData(const Module *M)
- : ImmutablePass(&ID) {
+ : ImmutablePass(ID) {
init(M->getDataLayout());
}
void
-TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
if (Alignments[i].AlignType == align_type &&
@@ -455,15 +454,6 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
case Type::StructTyID:
// Get the layout annotation... which is lazily created on demand.
return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- uint64_t Size = 0;
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Size = std::max(Size, getTypeSizeInBits(*i));
- }
- return Size;
- }
case Type::IntegerTyID:
return cast<IntegerType>(Ty)->getBitWidth();
case Type::VoidTyID:
@@ -496,7 +486,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
int AlignType = -1;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -518,18 +508,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
- return std::max(Align, (unsigned)Layout->getAlignment());
- }
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- unsigned Align = 1;
-
- // Unions need the maximum alignment of all their entries
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
- }
- return Align;
+ return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
case Type::VoidTyID:
@@ -556,18 +535,18 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
abi_or_pref, Ty);
}
-unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
return getAlignment(Ty, true);
}
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
-unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
}
-unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
if (Alignments[i].AlignType == STACK_ALIGN)
return Alignments[i].ABIAlign;
@@ -575,12 +554,12 @@ unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
return getABITypeAlignment(Ty);
}
-unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
return getAlignment(Ty, false);
}
-unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
- unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
return Log2_32(Align);
}
@@ -615,18 +594,13 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
// Update Ty to refer to current element
Ty = STy->getElementType(FieldNo);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
- unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
- // Offset into union is canonically 0, but type changes
- Ty = UnTy->getElementType(FieldNo);
} else {
// Update Ty to refer to current element
Ty = cast<SequentialType>(Ty)->getElementType();
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
- Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
}
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 47c91df1400e..705b1c097e55 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -30,7 +30,8 @@ namespace llvm {
bool NoFramePointerElimNonLeaf;
bool NoExcessFPPrecision;
bool UnsafeFPMath;
- bool FiniteOnlyFPMathOption;
+ bool NoInfsFPMath;
+ bool NoNaNsFPMath;
bool HonorSignDependentRoundingFPMathOption;
bool UseSoftFloat;
FloatABI::ABIType FloatABIType;
@@ -80,9 +81,14 @@ EnableUnsafeFPMath("enable-unsafe-fp-math",
cl::location(UnsafeFPMath),
cl::init(false));
static cl::opt<bool, true>
-EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
- cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
- cl::location(FiniteOnlyFPMathOption),
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::location(NoInfsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::location(NoNaNsFPMath),
cl::init(false));
static cl::opt<bool, true>
EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
@@ -290,12 +296,6 @@ namespace llvm {
/// result is "less precise" than doing those operations individually.
bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
- /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
- /// option is specified on the command line. If this returns false (default),
- /// the code generator is not allowed to assume that FP arithmetic arguments
- /// and results are never NaNs or +-Infs.
- bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool HonorSignDependentRoundingFPMath() {
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 49bfad54136d..55f222c7c1c9 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -63,7 +63,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// getAllocatableSetForRC - Toggle the bits that represent allocatable
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
- const TargetRegisterClass *RC, BitVector &R){
+ const TargetRegisterClass *RC, BitVector &R){
for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
E = RC->allocation_order_end(MF); I != E; ++I)
R.set(*I);
@@ -74,12 +74,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
BitVector Allocatable(NumRegs);
if (RC) {
getAllocatableSetForRC(MF, RC, Allocatable);
- return Allocatable;
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
}
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- getAllocatableSetForRC(MF, *I, Allocatable);
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable ^= Reserved & Allocatable;
+
return Allocatable;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1e66ab9d2c3..f8588d818b75 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -9,6 +9,8 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -19,6 +21,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
@@ -28,6 +31,7 @@ struct X86Operand;
class X86ATTAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
protected:
unsigned Is64Bit : 1;
@@ -37,8 +41,6 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -48,13 +50,14 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- void InstructionCleanup(MCInst &Inst);
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst);
- /// @name Auto-generated Match Functions
+ /// @name Auto-generated Matcher Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
+ unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
bool MatchInstructionImpl(
const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
@@ -62,27 +65,32 @@ private:
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+ }
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = true;
}
};
@@ -90,7 +98,7 @@ public:
} // end anonymous namespace
/// @name Auto-generated Match Functions
-/// {
+/// {
static unsigned MatchRegisterName(StringRef Name);
@@ -109,7 +117,7 @@ struct X86Operand : public MCParsedAsmOperand {
} Kind;
SMLoc StartLoc, EndLoc;
-
+
union {
struct {
const char *Data;
@@ -141,6 +149,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ virtual void dump(raw_ostream &OS) const {}
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -185,7 +195,7 @@ struct X86Operand : public MCParsedAsmOperand {
bool isToken() const {return Kind == Token; }
bool isImm() const { return Kind == Immediate; }
-
+
bool isImmSExti16i8() const {
if (!isImm())
return false;
@@ -260,10 +270,6 @@ struct X86Operand : public MCParsedAsmOperand {
!getMemIndexReg() && getMemScale() == 1;
}
- bool isNoSegMem() const {
- return Kind == Memory && !getMemSegReg();
- }
-
bool isReg() const { return Kind == Register; }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
@@ -298,14 +304,6 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
- void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
- assert((N == 4) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
- Inst.addOperand(MCOperand::CreateImm(getMemScale()));
- Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- addExpr(Inst, getMemDisp());
- }
-
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
X86Operand *Res = new X86Operand(Token, Loc, Loc);
Res->Tok.Data = Str.data();
@@ -376,13 +374,19 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
-
+
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+ // can be also checked.
+ if (RegNo == X86::RIZ && !Is64Bit)
+ return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
// Parse %st(1) and "%st" as "%st(0)"
if (RegNo == 0 && Tok.getString() == "st") {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
-
+
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
@@ -403,15 +407,15 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
-
+
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
-
+
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat ')'
return false;
}
-
+
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -426,14 +430,14 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
-
+
if (RegNo != 0) {
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat it.
return false;
}
}
-
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
@@ -452,13 +456,17 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
-
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
-
+
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -477,7 +485,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
-
+
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
@@ -486,7 +494,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -495,7 +503,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
} else {
@@ -503,17 +511,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// so we have to eat the ( to see beyond it.
SMLoc LParenLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat the '('.
-
+
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
// Nothing to do here, fall into the code below with the '(' part of the
// memory operand consumed.
} else {
SMLoc ExprEnd;
-
+
// It must be an parenthesized expression, parse it now.
if (getParser().ParseParenExpression(Disp, ExprEnd))
return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -522,21 +530,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
}
}
-
+
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
-
+
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
}
-
+
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -545,11 +557,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// correctly.
//
// Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't.
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(IndexReg, L, L)) return 0;
-
+
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
@@ -566,7 +578,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
int64_t ScaleVal;
if (getParser().ParseAbsoluteExpression(ScaleVal))
return 0;
-
+
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
@@ -576,19 +588,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
}
} else if (getLexer().isNot(AsmToken::RParen)) {
- // Otherwise we have the unsupported form of a scale amount without an
+ // A scale amount without an index is ignored.
// index.
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
if (getParser().ParseAbsoluteExpression(Value))
return 0;
-
- Error(Loc, "cannot have scale factor without index register");
- return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
}
}
-
+
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
@@ -596,7 +609,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
-
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
@@ -743,6 +756,23 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
}
+
+ // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
+ if (PatchedName.startswith("vpclmul")) {
+ unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
+ PatchedName.slice(7, PatchedName.size() - 2))
+ .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
+ .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
+ .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
+ .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
+ .Default(~0U);
+ if (CLMULQuadWordSelect != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
+ getParser().getContext());
+ assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
+ PatchedName = "vpclmulqdq";
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
@@ -785,6 +815,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+
// FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
// "f{mul*,add*,sub*,div*} $op"
if ((Name.startswith("fmul") || Name.startswith("fadd") ||
@@ -796,6 +840,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 2);
}
+ // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
+ // B".
+ if (Name.startswith("imul") && Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isImm() &&
+ static_cast<X86Operand*>(Operands.back())->isReg()) {
+ X86Operand *Op = static_cast<X86Operand*>(Operands.back());
+ Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
+ Op->getEndLoc()));
+ }
+
return false;
}
@@ -819,7 +873,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
@@ -831,82 +885,32 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
-/// imm operand, to having "rm" or "mr" operands with the offset in the disp
-/// field.
-static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
- bool isMR) {
- MCOperand Disp = Inst.getOperand(0);
-
- // Start over with an empty instruction.
- Inst = MCInst();
- Inst.setOpcode(Opc);
-
- if (!isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-
- // Add the mem operand.
- Inst.addOperand(MCOperand::CreateReg(0)); // Segment
- Inst.addOperand(MCOperand::CreateImm(1)); // Scale
- Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
- Inst.addOperand(Disp); // Displacement
- Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
-
- if (isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-}
-
-// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
-// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
-// proper mechanism for supporting (ambiguous) feature dependent instructions.
-void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
- if (!Is64Bit) return;
-
- switch (Inst.getOpcode()) {
- case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
- case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
- case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
- case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
- case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
- case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
- case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
- case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
-
- // moffset instructions are x86-32 only.
- case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
- case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
- case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
- case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
- case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
- case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
- }
-}
bool
-X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
+X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*>
&Operands,
MCInst &Inst) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
// First, try a direct match.
if (!MatchInstructionImpl(Operands, Inst))
return false;
- // Ignore anything which is obviously not a suffix match.
- if (Operands.size() == 0)
- return true;
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- if (!Op->isToken() || Op->getToken().size() > 15)
- return true;
-
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
// Change the operand to point to a temporary token.
- char Tmp[16];
StringRef Base = Op->getToken();
- memcpy(Tmp, Base.data(), Base.size());
- Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
// Check for the various suffix matches.
Tmp[Base.size()] = 'b';
@@ -928,6 +932,38 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
return false;
// Otherwise, the match failed.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (MatchB + MatchW + MatchL + MatchQ != 4) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (!MatchB)
+ MatchChars[NumMatches++] = 'b';
+ if (!MatchW)
+ MatchChars[NumMatches++] = 'w';
+ if (!MatchL)
+ MatchChars[NumMatches++] = 'l';
+ if (!MatchQ)
+ MatchChars[NumMatches++] = 'q';
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ } else {
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+ }
+
return true;
}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index b70a587ec4e2..033973eeeff9 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,8 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMX86AsmPrinter
X86ATTInstPrinter.cpp
- X86AsmPrinter.cpp
X86IntelInstPrinter.cpp
- X86MCInstLower.cpp
+ X86InstComments.cpp
)
add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index f2cdb5ba55eb..554b96c96e0e 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -31,6 +32,10 @@ using namespace llvm;
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3be4bae5bec2..eb986643014c 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -56,6 +56,9 @@ public:
void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
new file mode 100644
index 000000000000..da9d5a3579e5
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
@@ -0,0 +1,232 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86GenInstrNames.inc"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<unsigned, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(2, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(2, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ DecodeUNPCKLPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ DecodeUNPCKLPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ DecodeUNPCKHPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ DecodeUNPCKHPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h
new file mode 100644
index 000000000000..6b86db4f9e5c
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index a632047f6592..5625b0ea618f 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -30,6 +31,10 @@ using namespace llvm;
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 4d680744dd60..6f120322742b 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -64,6 +64,10 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "DWORD PTR ";
printMemReference(MI, OpNo, O);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 133482036ce1..e9399f5c8322 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -18,23 +18,24 @@ tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
set(sources
SSEDomainFix.cpp
X86AsmBackend.cpp
- X86CodeEmitter.cpp
+ X86AsmPrinter.cpp
X86COFFMachineModuleInfo.cpp
+ X86CodeEmitter.cpp
X86ELFWriterInfo.cpp
+ X86FastISel.cpp
X86FloatingPoint.cpp
- X86FloatingPointRegKill.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
+ X86MCInstLower.cpp
X86RegisterInfo.cpp
+ X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
- X86FastISel.cpp
- X86SelectionDAGInfo.cpp
)
if( CMAKE_CL_64 )
@@ -49,4 +50,3 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
index be28e8b394a4..39efd2dbcf1a 100644
--- a/lib/Target/X86/README-FPStack.txt
+++ b/lib/Target/X86/README-FPStack.txt
@@ -27,8 +27,8 @@ def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
//===---------------------------------------------------------------------===//
-The FP stackifier needs to be global. Also, it should handle simple permutates
-to reduce number of shuffle instructions, e.g. turning:
+The FP stackifier should handle simple permutates to reduce number of shuffle
+instructions, e.g. turning:
fld P -> fld Q
fld Q fld P
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index b6aba93f3738..f96b22f1e204 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -2,8 +2,46 @@
// Random ideas for the X86 backend: SSE-specific stuff.
//===---------------------------------------------------------------------===//
-- Consider eliminating the unaligned SSE load intrinsics, replacing them with
- unaligned LLVM load instructions.
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+ return _mm_shuffle_epi8(value,
+ _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them. Compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+ return A+B;
+}
+
+into:
+
+_f32:
+ movdqa %xmm0, %xmm2
+ addss %xmm1, %xmm2
+ pshufd $16, %xmm2, %xmm2
+ pshufd $1, %xmm1, %xmm1
+ pshufd $1, %xmm0, %xmm0
+ addss %xmm1, %xmm0
+ pshufd $16, %xmm0, %xmm1
+ movdqa %xmm2, %xmm0
+ unpcklps %xmm1, %xmm0
+ ret
+
+seems silly.
+
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index efc0cd82f23e..a305ae6ec550 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1135,13 +1135,6 @@ void test(double *P) {
//===---------------------------------------------------------------------===//
-handling llvm.memory.barrier on pre SSE2 cpus
-
-should generate:
-lock ; mov %esp, %esp
-
-//===---------------------------------------------------------------------===//
-
The generated code on x86 for checking for signed overflow on a multiply the
obvious way is much longer than it needs to be.
@@ -1870,3 +1863,100 @@ The code produced by gcc is 3 bytes shorter. This sort of construct often
shows up with bitfields.
//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %tmp = xor i32 %b, %a ; <i32> [#uses=1]
+ %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ xorl %esi, %edi
+ testb $-1, %dil
+ sete %al
+ movzbl %al, %eax
+ ret
+
+A cmpb instead of the xorl+testb would be one instruction shorter.
+
+//===---------------------------------------------------------------------===//
+
+Given the following C code:
+int f(int a, int b) { return (signed char)a == (signed char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %sext = shl i32 %a, 24 ; <i32> [#uses=1]
+ %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1]
+ %sext6 = shl i32 %b, 24 ; <i32> [#uses=1]
+ %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ movsbl %sil, %eax
+ movsbl %dil, %ecx
+ cmpl %eax, %ecx
+ sete %al
+ movzbl %al, %eax
+ ret
+
+
+It should be possible to eliminate the sign extensions.
+
+//===---------------------------------------------------------------------===//
+
+LLVM misses a load+store narrowing opportunity in this code:
+
+%struct.bf = type { i64, i16, i16, i32 }
+
+@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2]
+
+define void @t1() nounwind ssp {
+entry:
+ %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
+ %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2]
+ %3 = load i32* %2, align 1 ; <i32> [#uses=1]
+ %4 = and i32 %3, -65537 ; <i32> [#uses=1]
+ store i32 %4, i32* %2, align 1
+ %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
+ %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2]
+ %8 = load i32* %7, align 1 ; <i32> [#uses=1]
+ %9 = and i32 %8, -131073 ; <i32> [#uses=1]
+ store i32 %9, i32* %7, align 1
+ ret void
+}
+
+LLVM currently emits this:
+
+ movq bfi(%rip), %rax
+ andl $-65537, 8(%rax)
+ movq bfi(%rip), %rax
+ andl $-131073, 8(%rax)
+ ret
+
+It could narrow the loads and stores to emit this:
+
+ movq bfi(%rip), %rax
+ andb $-2, 10(%rax)
+ movq bfi(%rip), %rax
+ andb $-3, 10(%rax)
+ ret
+
+The trouble is that there is a TokenFactor between the store and the
+load, making it non-trivial to determine if there's anything between
+the load and the store which would prohibit narrowing.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index dab070e1febd..13680c592e01 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -115,7 +115,7 @@ class SSEDomainFixPass : public MachineFunctionPass {
unsigned Distance;
public:
- SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+ SSEDomainFixPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 677781d3730e..27e88505150b 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -49,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// crossings.
FunctionPass *createSSEDomainFixPass();
-/// createX87FPRegKillInserterPass - This function returns a pass which
-/// inserts FP_REG_KILL instructions where needed.
-///
-FunctionPass *createX87FPRegKillInserterPass();
-
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a53f973c1c43..a19f1acffaca 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -67,6 +67,8 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
+def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
+ "Enable carry-less multiplication instructions">;
def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
@@ -180,8 +182,6 @@ include "X86CallingConv.td"
// Currently the X86 assembly parser only supports ATT syntax.
def ATTAsmParser : AsmParser {
string AsmParserClassName = "ATTAsmParser";
- string AsmParserInstCleanup = "InstructionCleanup";
- string MatchInstructionName = "MatchInstructionImpl";
int Variant = 0;
// Discard comments in assembly strings.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 2cf65c11f94a..69dc967f9d88 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -11,9 +11,11 @@
#include "X86.h"
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/ELFObjectWriter.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MachObjectWriter.h"
@@ -190,10 +192,6 @@ public:
HasScatteredSymbols = true;
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return 0;
- }
-
bool isVirtualSection(const MCSection &Section) const {
const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
return SE.getType() == MCSectionELF::SHT_NOBITS;;
@@ -204,12 +202,43 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_32AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/false,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/false);
+ }
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/true,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/true);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit)
+ : X86AsmBackend(T)
+ , Is64Bit(is64Bit) {
+ HasScatteredSymbols = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createWinCOFFObjectWriter(OS, Is64Bit);
+ }
+
+ bool isVirtualSection(const MCSection &Section) const {
+ const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
+ return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ }
};
class DarwinX86AsmBackend : public X86AsmBackend {
@@ -290,6 +319,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, false);
default:
return new ELFX86_32AsmBackend(T);
}
@@ -300,6 +333,10 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, true);
default:
return new ELFX86_64AsmBackend(T);
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 08e6486d5b7a..20110ad788cd 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86ATTInstPrinter.h"
-#include "X86IntelInstPrinter.h"
+#include "AsmPrinter/X86ATTInstPrinter.h"
+#include "AsmPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -35,6 +36,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
@@ -218,6 +220,10 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
@@ -655,6 +661,47 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
//===----------------------------------------------------------------------===//
// Target Registry Stuff
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index b5a7f8dc321a..e61be66c75a2 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -14,9 +14,9 @@
#ifndef X86ASMPRINTER_H
#define X86ASMPRINTER_H
-#include "../X86.h"
-#include "../X86MachineFunctionInfo.h"
-#include "../X86TargetMachine.h"
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a6a1e4e573cf..e3409effc318 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -33,13 +33,19 @@ def RetCC_X86Common : CallingConv<[
CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
-
- // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
@@ -164,11 +170,16 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
+
+ // The first 8 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
+
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80], CCAssignToStack<0, 0>>,
@@ -176,6 +187,10 @@ def CC_X86_64_C : CallingConv<[
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
@@ -271,9 +286,18 @@ def CC_X86_32_Common : CallingConv<[
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f13669bd741d..824021c0c882 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -53,12 +53,12 @@ namespace {
public:
static char ID;
explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(&ID), II(0), TD(0), TM(tm),
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
Emitter(X86TargetMachine &tm, CodeEmitter &mce,
const X86InstrInfo &ii, const TargetData &td, bool is64)
- : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm),
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -146,6 +146,103 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
/// emitPCRelativeBlockAddress - This method keeps track of the information
/// necessary to resolve the address of this block later and emits a dummy
/// value.
@@ -569,7 +666,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// Handle REX prefix.
if (Is64BitMode) {
- if (unsigned REX = X86InstrInfo::determineREX(MI))
+ if (unsigned REX = determineREX(MI))
MCE.emitByte(0x40 | REX);
}
@@ -605,24 +702,29 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// base address.
switch (Opcode) {
default:
- llvm_unreachable("psuedo instructions should be removed before code"
+ llvm_unreachable("pseudo instructions should be removed before code"
" emission");
break;
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
if (MI.getOperand(0).getSymbolName()[0])
report_fatal_error("JIT does not support inline asm!");
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
-
+
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
@@ -674,7 +776,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+ Opcode == X86::WINCALL64pcrel32) {
// Fix up immediate operand for pc relative calls.
intptr_t Imm = (intptr_t)MO.getImm();
Imm = Imm - MCE.getCurrentPCValue() - 4;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ce1370763b77..0c70eec4827f 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -960,9 +960,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // Fold the common case of a conditional branch with a comparison.
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse()) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
@@ -1058,10 +1060,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
- unsigned Src, Dst, SrcSR, DstSR;
-
- if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
- Reg = Src;
+ if (MI.isCopy()) {
+ Reg = MI.getOperand(1).getReg();
continue;
}
@@ -1648,15 +1648,26 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
MachineInstrBuilder MIB;
if (CalleeOp) {
// Register-indirect call.
- unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
// Direct call.
assert(GV && "Not a direct call");
- unsigned CallOpc =
- Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index cee4ad70201a..e6ebf669587d 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -8,23 +8,18 @@
//===----------------------------------------------------------------------===//
//
// This file defines the pass which converts floating point instructions from
-// virtual registers into register stack instructions. This pass uses live
+// pseudo registers into register stack instructions. This pass uses live
// variable information to indicate where the FPn registers are used and their
// lifetimes.
//
-// This pass is hampered by the lack of decent CFG manipulation routines for
-// machine code. In particular, this wants to be able to split critical edges
-// as necessary, traverse the machine basic block CFG in depth-first order, and
-// allow there to be multiple machine basic blocks for each LLVM basicblock
-// (needed for critical edge splitting).
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
//
-// In particular, this pass currently barfs on critical edges. Because of this,
-// it requires the instruction selector to insert FP_REG_KILL instructions on
-// the exits of any basic block that has critical edges going from it, or which
-// branch to a critical basic block.
-//
-// FIXME: this is not implemented yet. The stackifier pass only works on local
-// basic blocks.
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
//
//===----------------------------------------------------------------------===//
@@ -32,6 +27,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -54,7 +50,12 @@ STATISTIC(NumFP , "Number of floating point instructions");
namespace {
struct FPS : public MachineFunctionPass {
static char ID;
- FPS() : MachineFunctionPass(&ID) {}
+ FPS() : MachineFunctionPass(ID) {
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -69,11 +70,71 @@ namespace {
private:
const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // Map each MBB in the current function to an (ingoing, outgoing) index into
+ // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
+ // and are not actually stored in the map.
+ DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
MachineBasicBlock *MBB; // Current basic block
unsigned Stack[8]; // FP<n> Registers in each stack slot...
unsigned RegMap[8]; // Track which stack slot contains each register
unsigned StackTop; // The current top of the FP stack.
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
@@ -82,27 +143,36 @@ namespace {
}
dbgs() << "\n";
}
- private:
- /// isStackEmpty - Return true if the FP stack is empty.
- bool isStackEmpty() const {
- return StackTop == 0;
- }
-
- // getSlot - Return the stack slot number a particular register number is
- // in.
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
unsigned getSlot(unsigned RegNo) const {
assert(RegNo < 8 && "Regno out of range!");
return RegMap[RegNo];
}
- // getStackEntry - Return the X86::FP<n> register in register ST(i).
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() {
+ for (int i = 7; i >= 0; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
assert(STi < StackTop && "Access past stack top!");
return Stack[StackTop-1-STi];
}
- // getSTReg - Return the X86::ST(i) register which contains the specified
- // FP<RegNo> register.
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
}
@@ -117,10 +187,9 @@ namespace {
bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
if (isAtTop(RegNo)) return;
-
+
unsigned STReg = getSTReg(RegNo);
unsigned RegOnTop = getStackEntry(0);
@@ -137,24 +206,37 @@ namespace {
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
- DebugLoc dl = I->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
unsigned STReg = getSTReg(RegNo);
pushReg(AsReg); // New register on top of stack
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
- // popStackAfter - Pop the current value off of the top of the FP stack
- // after the specified instruction.
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
- // freeStackSlotAfter - Free the specified register from the register stack,
- // so that it is no longer in a register. If the register is currently at
- // the top of the stack, we just pop the current instruction, otherwise we
- // store the current top-of-stack into the specified slot, then pop the top
- // of stack.
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
void handleZeroArgFP(MachineBasicBlock::iterator &I);
@@ -181,7 +263,6 @@ static unsigned getFPReg(const MachineOperand &MO) {
return Reg - X86::FP0;
}
-
/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
/// register references into FP stack references.
///
@@ -201,6 +282,10 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (!FPIsUsed) return false;
TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
StackTop = 0;
// Process the function in depth first order so that we process at least one
@@ -215,16 +300,111 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
Changed |= processBasicBlock(MF, **I);
// Process any unreachable blocks in arbitrary order now.
- if (MF.size() == Processed.size())
- return Changed;
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ BlockBundle.clear();
+ LiveBundles.clear();
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB))
- Changed |= processBasicBlock(MF, *BB);
-
return Changed;
}
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ assert(BlockBundle.empty() && "Stale data in BlockBundle");
+ SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+
+ // LiveBundle[0] is the empty live-in set.
+ LiveBundles.resize(1);
+
+ // First gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Ingoing bundle index.
+ unsigned &Idx = BlockBundle[MBB].first;
+ // Already assigned an ingoing bundle?
+ if (Idx)
+ continue;
+ // Allocate a new LiveBundle struct for this block's live-ins.
+ const unsigned BundleIdx = Idx = LiveBundles.size();
+ DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
+ << MBB->getNumber());
+ LiveBundles.push_back(Mask);
+ LiveBundle &Bundle = LiveBundles.back();
+
+ // Make sure all predecessors have the same live-out set.
+ PropUp.insert(MBB);
+
+ // Keep pushing liveness up and down the CFG until convergence.
+ // Only critical edges cause iteration here, but when they do, multiple
+ // blocks can be assigned to the same LiveBundle index.
+ do {
+ // Assign BundleIdx as liveout from predecessors in PropUp.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
+ E = PropUp.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
+ LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *PredMBB = *LinkI;
+ // PredMBB's liveout bundle should be set to LIIdx.
+ unsigned &Idx = BlockBundle[PredMBB].second;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
+ // Propagate to siblings.
+ if (PredMBB->succ_size() > 1)
+ PropDown.insert(PredMBB);
+ }
+ }
+ PropUp.clear();
+
+ // Assign BundleIdx as livein to successors in PropDown.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
+ E = PropDown.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
+ LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *SuccMBB = *LinkI;
+ // LinkMBB's livein bundle should be set to BundleIdx.
+ unsigned &Idx = BlockBundle[SuccMBB].first;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
+ // Propagate to siblings.
+ if (SuccMBB->pred_size() > 1)
+ PropUp.insert(SuccMBB);
+ // Also accumulate the bundle liveness mask from the liveins here.
+ Bundle.Mask |= calcLiveInMask(SuccMBB);
+ }
+ }
+ PropDown.clear();
+ } while (!PropUp.empty());
+ DEBUG({
+ dbgs() << " live:";
+ for (unsigned i = 0; i < 8; ++i)
+ if (Bundle.Mask & (1<<i))
+ dbgs() << " %FP" << i;
+ dbgs() << '\n';
+ });
+ }
+}
+
/// processBasicBlock - Loop over all of the instructions in the basic block,
/// transforming FP instructions into their stack form.
///
@@ -232,10 +412,12 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
+ setupBlockStack();
+
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
uint64_t Flags = MI->getDesc().TSFlags;
-
+
unsigned FPInstClass = Flags & X86II::FPTypeMask;
if (MI->isInlineAsm())
FPInstClass = X86II::SpecialFP;
@@ -302,10 +484,82 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
Changed = true;
}
- assert(isStackEmpty() && "Stack not empty at end of basic block?");
+ finishBlockStack();
+
return Changed;
}
+/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// Efficient Lookup Table Support
//===----------------------------------------------------------------------===//
@@ -318,7 +572,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool operator<(unsigned V, const TableEntry &TE) {
+ friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
@@ -597,6 +851,13 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
// Otherwise, store the top of stack into the dead slot, killing the operand
// without having to add in an explicit xchg then pop.
//
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
unsigned STReg = getSTReg(FPRegNo);
unsigned OldSlot = getSlot(FPRegNo);
unsigned TopReg = Stack[StackTop-1];
@@ -604,9 +865,90 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
RegMap[TopReg] = OldSlot;
RegMap[FPRegNo] = ~0;
Stack[--StackTop] = ~0;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
- I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg);
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ for (;;) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
}
@@ -660,7 +1002,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_Fp32m80 ||
MI->getOpcode() == X86::ISTT_Fp64m80 ||
MI->getOpcode() == X86::ST_FpP80m)) {
- duplicateToTop(Reg, 7 /*temp register*/, I);
+ duplicateToTop(Reg, getScratchReg(), I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
@@ -1013,8 +1355,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
if (!MI->killsRegister(X86::FP0 + Op0)) {
// Duplicate Op0 into a temporary on the stack top.
- // This actually assumes that FP7 is dead.
- duplicateToTop(Op0, 7, I);
+ duplicateToTop(Op0, getScratchReg(), I);
} else {
// Op0 is killed, so just swap it into position.
moveToTop(Op0, I);
@@ -1034,8 +1375,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
if (!MI->killsRegister(X86::FP0 + Op0)) {
- // Assume FP6 is not live, use it as a scratch register.
- duplicateToTop(Op0, 6, I);
+ duplicateToTop(Op0, getScratchReg(), I);
moveToTop(RegOnTop, I);
} else if (getSTReg(Op0) != X86::ST1) {
// We have the wrong value at st(1). Shuffle! Untested!
@@ -1119,11 +1459,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::RETI:
// If RET has an FP register use operand, pass the first one in ST(0) and
// the second one in ST(1).
- if (isStackEmpty()) return; // Quick check to see if any are possible.
-
+
// Find the register operands.
unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
-
+ unsigned LiveMask = 0;
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
@@ -1142,12 +1482,18 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
assert(SecondFPRegOp == ~0U && "More than two fp operands!");
SecondFPRegOp = getFPReg(Op);
}
+ LiveMask |= (1 << getFPReg(Op));
// Remove the operand so that later passes don't see it.
MI->RemoveOperand(i);
--i, --e;
}
-
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
// There are only four possibilities here:
// 1) we are returning a single FP value. In this case, it has to be in
// ST(0) already, so just declare success by removing the value from the
@@ -1173,7 +1519,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
- unsigned NewReg = (FirstFPRegOp+1)%7;
+ unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
@@ -1197,7 +1543,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
I = MBB->erase(I); // Remove the pseudo instruction
- --I;
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
}
// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
deleted file mode 100644
index 2c98b96c510b..000000000000
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the pass which inserts FP_REG_KILL instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "x86-codegen"
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
-
-namespace {
- struct FPRegKiller : public MachineFunctionPass {
- static char ID;
- FPRegKiller() : MachineFunctionPass(&ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "X86 FP_REG_KILL inserter";
- }
- };
- char FPRegKiller::ID = 0;
-}
-
-FunctionPass *llvm::createX87FPRegKillInserterPass() {
- return new FPRegKiller();
-}
-
-/// isFPStackVReg - Return true if the specified vreg is from a fp stack
-/// register class.
-static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(RegNo))
- return false;
-
- switch (MRI.getRegClass(RegNo)->getID()) {
- default: return false;
- case X86::RFP32RegClassID:
- case X86::RFP64RegClassID:
- case X86::RFP80RegClassID:
- return true;
- }
-}
-
-
-/// ContainsFPStackCode - Return true if the specific MBB has floating point
-/// stack code, and thus needs an FP_REG_KILL.
-static bool ContainsFPStackCode(MachineBasicBlock *MBB,
- const MachineRegisterInfo &MRI) {
- // Scan the block, looking for instructions that define or use fp stack vregs.
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg())
- continue;
- if (unsigned Reg = I->getOperand(op).getReg())
- if (isFPStackVReg(Reg, MRI))
- return true;
- }
- }
-
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block, which is a definition of the
- // value.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++ SI) {
- MachineBasicBlock *SuccBB = *SI;
- for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
- I != E; ++I) {
- // All PHI nodes are at the top of the block.
- if (!I->isPHI()) break;
-
- if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
- return true;
- }
- }
-
- return false;
-}
-
-bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
- // If we are emitting FP stack code, scan the basic block to determine if this
- // block defines or uses any FP values. If so, put an FP_REG_KILL instruction
- // before the terminator of the block.
-
- // Note that FP stack instructions are used in all modes for long double,
- // so we always need to do this check.
- // Also note that it's possible for an FP stack register to be live across
- // an instruction that produces multiple basic blocks (SSE CMOV) so we
- // must check all the generated basic blocks.
-
- // Scan all of the machine instructions in these MBBs, checking for FP
- // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
-
- // Fast-path: If nothing is using the x87 registers, we don't need to do
- // any scanning.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
- return false;
-
- bool Changed = false;
- MachineFunction::iterator MBBI = MF.begin();
- MachineFunction::iterator EndMBB = MF.end();
- for (; MBBI != EndMBB; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
-
- // If this block returns, ignore it. We don't want to insert an FP_REG_KILL
- // before the return.
- if (!MBB->empty()) {
- MachineBasicBlock::iterator EndI = MBB->end();
- --EndI;
- if (EndI->getDesc().isReturn())
- continue;
- }
-
- // If we find any FP stack code, emit the FP_REG_KILL instruction.
- if (ContainsFPStackCode(MBB, MRI)) {
- BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
- MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
- ++NumFPKill;
- Changed = true;
- }
- }
-
- return Changed;
-}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 72f2bc11d7cc..c5234413aba6 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -171,6 +171,17 @@ namespace {
virtual void PreprocessISelDAG();
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -1312,13 +1323,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-static SDNode *FindCallStartFromCall(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCall(Node->getOperand(0).getNode());
-}
-
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
@@ -1403,7 +1407,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
@@ -1411,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
@@ -1426,7 +1430,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
@@ -1434,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
@@ -1450,17 +1454,17 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
}
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
}
}
@@ -1841,7 +1845,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
- if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
// Look past the truncate if CMP is the only use of it.
N0 = N0.getOperand(0);
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b3c48862898f..95dbb6176687 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
+#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/CallingConv.h"
@@ -343,8 +344,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- if (!Subtarget->hasSSE2())
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -837,6 +839,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -866,6 +872,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
@@ -877,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
//setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
//setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
//setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
@@ -1189,6 +1196,50 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = (Subtarget->is64Bit()
+ ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ break;
+ case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64:
+ RRC = X86::VR64RegisterClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = X86::VR128RegisterClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+unsigned
+X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 8 - FPDiff;
+ case X86::VR128RegClassID:
+ return Subtarget->is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
@@ -1259,6 +1310,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1276,14 +1340,20 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- EVT ValVT = ValToCopy.getValueType();
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
- ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasSSE2())
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ }
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@@ -1570,6 +1640,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = X86::FR32RegisterClass;
else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
@@ -1937,6 +2009,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && Subtarget->isTargetWin64()) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
@@ -1990,7 +2075,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg) {
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -1999,7 +2084,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
- // FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
@@ -2165,8 +2249,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (!isTailCall && Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
- // Add an implicit use of AL for x86 vararg functions.
- if (Is64Bit && isVarArg)
+ // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2356,8 +2440,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing any
- // arguments.
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
if (isVarArg && !Outs.empty())
return false;
@@ -2493,6 +2577,112 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return true;
+ }
+ return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -3347,18 +3537,27 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
- // type. This ensures they get CSE'd.
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 64) { // MMX
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (HasSSE2) { // SSE2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- } else { // SSE1
+ } else if (VT.getSizeInBits() == 128) {
+ if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.getSizeInBits() == 256) { // AVX
+ // 256-bit logic and arithmetic instructions in AVX are
+ // all floating-point, no support for integer ops. Default
+ // to emitting fp zeroed vectors then.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3372,9 +3571,9 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
+ if (VT.getSizeInBits() == 64) // MMX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
+ else // SSE
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3439,9 +3638,8 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
- bool HasSSE2) {
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
if (SV->getValueType(0).getVectorNumElements() <= 4)
return SDValue(SV, 0);
@@ -3488,68 +3686,253 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
-/// getNumOfConsecutiveZeros - Return the number of elements in a result of
-/// a shuffle that is zero.
-static
-unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
- bool Low, SelectionDAG &DAG) {
- unsigned NumZeros = 0;
- for (int i = 0; i < NumElems; ++i) {
- unsigned Index = Low ? i : NumElems-i-1;
- int Idx = SVOp->getMaskElt(Index);
- if (Idx < 0) {
- ++NumZeros;
- continue;
- }
- SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
- if (Elt.getNode() && X86::isZeroNode(Elt))
- ++NumZeros;
- else
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ Index = SV->getMaskElt(Index);
+
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ int NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ int NumElems = VT.getVectorNumElements();
+ SmallVector<unsigned, 16> ShuffleMask;
+ SDValue ImmN;
+
+ switch(Opcode) {
+ case X86ISD::SHUFPS:
+ case X86ISD::SHUFPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPSMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ DecodePUNPCKHMask(NumElems, ShuffleMask);
break;
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ DecodePUNPCKLMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PSHUFD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector.
+ unsigned OpNum = (Index == 0) ? 1 : 0;
+ return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+ Depth+1);
+ }
+ default:
+ assert("not implemented for target shuffle node");
+ return SDValue();
+ }
+
+ Index = ShuffleMask[Index];
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ Depth+1);
}
- return NumZeros;
-}
-/// isVectorShift - Returns true if the shuffle can be implemented as a
-/// logical left or right shift of a vector.
-/// FIXME: split into pslldqi, psrldqi, palignr variants.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
- bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
- isLeft = true;
- unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
- if (!NumZeros) {
- isLeft = false;
- NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
- if (!NumZeros)
- return false;
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two diferent directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ int i = 0;
+
+ while (i < NumElems) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ ++i;
}
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+ int OpIdx, int NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (unsigned i = NumZeros; i < NumElems; ++i) {
- unsigned Val = isLeft ? (i - NumZeros) : i;
- int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
- if (Idx_ < 0)
+
+ for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
continue;
- unsigned Idx = (unsigned) Idx_;
+
if (Idx < NumElems)
SeenV1 = true;
- else {
- Idx -= NumElems;
+ else
SeenV2 = true;
- }
- if (Idx != Val)
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
return false;
}
- if (SeenV1 && SeenV2)
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros-1, // Mask End Index
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems-1, // Mask End Index
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
return false;
- ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ isLeft = true;
ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
return true;
}
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
@@ -3779,9 +4162,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- // All zero's are handled with pxor, all one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllZeros(Op.getNode())
- || ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+ // All one's are handled with pcmpeqd. In AVX, zero's are handled with
+ // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
+ // is present, so AllOnes is ignored.
+ if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+ (Op.getValueType().getSizeInBits() != 256 &&
+ ISD::isBuildVectorAllOnes(Op.getNode()))) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
@@ -3819,10 +4206,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (NumNonZero == 0) {
- // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
return DAG.getUNDEF(VT);
- }
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
@@ -3960,7 +4346,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ *this);
if (V.getNode()) return V;
}
@@ -4014,28 +4400,51 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (LD.getNode())
return LD;
- // For SSE 4.1, use inserts into undef.
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
- V[0] = DAG.getUNDEF(VT);
- for (unsigned i = 0; i < NumElems; ++i)
- if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
- V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
- return V[0];
+ }
+ return Result;
}
- // Otherwise, expand into a number of unpckl*
- // e.g. for v4f32
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
- NumElems >>= 1;
- while (NumElems != 0) {
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
- NumElems >>= 1;
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
}
return V[0];
}
@@ -4074,10 +4483,10 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
// 2. [ssse3] 1 x pshufb
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-static
-SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const X86TargetLowering &TLI) {
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -4128,7 +4537,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4187,15 +4596,21 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
- return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+ X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
}
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -4262,6 +4677,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
MaskV.push_back(i);
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+ DAG);
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
@@ -4284,6 +4705,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+ DAG);
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -4473,7 +4900,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -4697,6 +5124,129 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
+static bool MayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (MayFoldLoad(V))
+ return true;
+ return false;
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasSSE2 && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ else if (HasSSE2)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ default:
+ llvm_unreachable("Unknow type for unpckl");
+ }
+ return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ default:
+ llvm_unreachable("Unknow type for unpckh");
+ }
+ return 0;
+}
+
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -4710,6 +5260,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
@@ -4718,7 +5272,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (SVOp->isSplat()) {
if (isMMX || NumElems < 4)
return Op;
- return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
@@ -4746,8 +5300,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (X86::isPSHUFDMask(SVOp))
- return Op;
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isPSHUFDMask(SVOp)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+ TargetMask, DAG);
+
+ if (VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
// Check if this can be converted into a logical shift.
bool isLeft = false;
@@ -4768,17 +5349,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX)
- return Op;
+ if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
}
// FIXME: fold these into legal mask.
- if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
- X86::isMOVSLDUPMask(SVOp) ||
- X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVLHPSMask(SVOp) ||
- X86::isMOVLPMask(SVOp)))
- return Op;
+ if (!isMMX) {
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
+ }
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -4818,11 +5414,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
- X86::isUNPCKH_v_undef_Mask(SVOp) ||
- X86::isUNPCKLMask(SVOp) ||
- X86::isUNPCKHMask(SVOp))
- return Op;
+ if (X86::isUNPCKLMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (X86::isUNPCKHMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -4844,11 +5442,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// FIXME: this seems wrong.
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKLMask(NewSVOp) ||
- X86::isUNPCKHMask(NewSVOp))
- return NewOp;
+
+ if (X86::isUNPCKLMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+
+ if (X86::isUNPCKHMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
// FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
@@ -4857,15 +5458,52 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
- // Check for legal shuffle and return?
- SmallVector<int, 16> PermMask;
- SVOp->getMask(PermMask);
- if (isShuffleMaskLegal(PermMask, VT))
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+
+ // FIXME: pshufb, blends, shifts.
+ if (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
return Op;
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT)) {
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (VT == MVT::v4f32 || VT == MVT::v4i32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+ TargetMask, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+ TargetMask, DAG);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -6922,24 +7560,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // ptest intrinsics. The intrinsic these come from are designed to return
- // an integer value, not just an instruction so lower it to the ptest
- // pattern and a setcc for the result.
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
- case Intrinsic::x86_sse41_ptestnzc:{
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
unsigned X86CC = 0;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
// ZF = 1
X86CC = X86::COND_E;
break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
// CF = 1
X86CC = X86::COND_B;
break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0
X86CC = X86::COND_A;
break;
@@ -6947,7 +7619,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -7110,12 +7783,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
- getPointerTy());
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
- SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(-TD->getPointerSize()));
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -7218,7 +7892,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- report_fatal_error("Nest register in use - reduce number of inreg parameters!");
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
}
}
break;
@@ -7439,6 +8114,86 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+
+ LLVMContext *Context = DAG.getContext();
+
+ assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+
+ if (VT == MVT::v4i32) {
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+ std::vector<Constant*> CV(4, CI);
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8) {
+ // a = a << 5;
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+ ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+ ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+ std::vector<Constant*> CVM1(16, CM1);
+ std::vector<Constant*> CVM2(16, CM2);
+ Constant *C = ConstantVector::get(CVM1);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)15, 4), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(4, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ C = ConstantVector::get(CVM2);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(2, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ // return pblendv(r, r+r, a);
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ return R;
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
@@ -7508,6 +8263,50 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return Sum;
}
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->hasSSE2()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0,
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -7597,6 +8396,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -7640,6 +8440,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SHL: return LowerSHL(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -7852,6 +8653,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
+ case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
+ case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
+ case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
+ case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
+ case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
+ case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
+ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
+ case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
+ case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
+ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
+ case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
+ case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
}
@@ -7863,6 +8698,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
@@ -7882,7 +8718,8 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return false;
// If lower 4G is not available, then we must use rip-relative addressing.
- if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
@@ -8368,19 +9205,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
-// all of this code can be replaced with that in the .td file.
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
+ assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ "Target must have SSE4.2 or AVX features enabled");
+
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
- if (memArg)
- Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
- else
- Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+
+ if (!Subtarget->hasAVX()) {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+ } else {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ }
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
@@ -8562,7 +9411,8 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -8579,6 +9429,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
+ bool IsWin64 = Subtarget->isTargetWin64();
assert(MI->getOperand(3).isGlobal() && "This should be a global");
@@ -8590,7 +9441,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -8727,12 +9578,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Atomic Lowering.
@@ -8966,21 +9821,20 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (VT.getSizeInBits() != 128)
return SDValue();
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
-
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
-/// PerformShuffleCombine - Detect vector gather/scatter index generation
-/// and convert it from being a bunch of shuffles and extracts to a simple
-/// store and scalar loads to extract the elements.
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue InputVector = N->getOperand(0);
@@ -9030,8 +9884,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
+ 0, false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9045,11 +9899,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ OffsetVal, StackPtr);
// Load the scalar.
- SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
- NULL, 0, false, false, 0);
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, NULL, 0, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -9087,8 +9942,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9126,8 +9980,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9156,8 +10009,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9182,8 +10034,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
@@ -9193,8 +10044,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9905,7 +10755,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
@@ -9922,6 +10771,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SHUFPS: // Handle all target specific shuffles
+ case X86ISD::SHUFPD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
}
return SDValue();
@@ -9956,14 +10827,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
-static bool MayFoldLoad(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
-}
-
-static bool MayFoldIntoStore(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
-}
-
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4e4daa4bc5ca..d2d9b28a0396 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -248,6 +248,44 @@ namespace llvm {
// PTEST - Vector bitwise comparisons
PTEST,
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ PSHUFHW_LD,
+ PSHUFLW_LD,
+ SHUFPD,
+ SHUFPS,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVSHDUP_LD,
+ MOVSLDUP_LD,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVHLPD,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKLPS,
+ UNPCKLPD,
+ UNPCKHPS,
+ UNPCKHPD,
+ PUNPCKLBW,
+ PUNPCKLWD,
+ PUNPCKLDQ,
+ PUNPCKLQDQ,
+ PUNPCKHBW,
+ PUNPCKHWD,
+ PUNPCKHDQ,
+ PUNPCKHQDQ,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -265,7 +303,13 @@ namespace llvm {
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
- ATOMSWAP64_DAG
+ ATOMSWAP64_DAG,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -584,12 +628,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -710,11 +761,16 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE
+ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 42d0e7f9778a..0884b61425e9 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -73,11 +73,7 @@ def GetLo32XForm : SDNodeXForm<imm, [{
return getI32Imm((unsigned)N->getZExtValue());
}]>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
def i64immZExt32 : PatLeaf<(i64 imm), [{
@@ -158,7 +154,7 @@ let isCall = 1 in
// FIXME: We need to teach codegen about single list of call-clobbered
// registers.
-let isCall = 1 in
+let isCall = 1, isCodeGenOnly = 1 in
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
@@ -168,7 +164,7 @@ let isCall = 1 in
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
Uses = [RSP] in {
- def WINCALL64pcrel32 : I<0xE8, RawFrm,
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
Requires<[IsWin64]>;
@@ -182,7 +178,8 @@ let isCall = 1 in
}
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
@@ -216,9 +213,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp{q}\t$dst", []>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>;
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>;
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
"ljmp{q}\t{*}$dst", []>;
}
@@ -246,7 +243,7 @@ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
def POP64r : I<0x58, AddRegFrm,
@@ -330,7 +327,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Move Instructions...
@@ -374,6 +371,7 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store i64immSExt32:$src, addr:$dst)]>;
/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
@@ -388,7 +386,13 @@ let mayStore = 1 in
def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[]>;
+}
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{q}\t{$src, %rax|%rax, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
@@ -397,6 +401,7 @@ def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
// Moves to and from segment registers
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
@@ -1316,14 +1321,13 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
[]
>, TB;
-def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
- REX_W;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
-def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))]>, TB;
@@ -1537,116 +1541,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
//===----------------------------------------------------------------------===//
-// Conversion Instructions...
-//
-
-// f64 -> signed i64
-def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
-def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
- (load addr:$src)))]>;
-def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
-def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
-def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64
- (load addr:$src)))]>;
-
-// Signed i64 -> f64
-def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- GR64:$src2))]>;
-def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// Signed i64 -> f32
-def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- GR64:$src2))]>;
- def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// f32 -> signed i64
-def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvtss2si64 VR128:$src))]>;
-def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse_cvtss2si64
- (load addr:$src)))]>;
-def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 VR128:$src))]>;
-def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
-
// Descriptor-table support instructions
// LLDT is not interpreted specially in 64-bit mode because there is no sign
@@ -1726,6 +1620,14 @@ def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
// Atomic Instructions
//===----------------------------------------------------------------------===//
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP] in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
let Defs = [RAX, EFLAGS], Uses = [RAX] in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
@@ -1772,7 +1674,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
// Optimized codegen when the non-memory output is not used.
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
"add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 000000000000..d868773d2d69
--- /dev/null
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Fused Multiply-Add
+ defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+ // Fused Negative Multiply-Add
+ defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index da93de988d50..9c9bcc7d0b6a 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -108,10 +108,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
-let isTerminator = 1 in
- let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
- def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>;
-
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
@@ -157,7 +153,7 @@ def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
}
-// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cc3fdf1efd7b..79187e9a76d7 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -39,6 +39,7 @@ def MRM_E8 : Format<39>;
def MRM_F0 : Format<40>;
def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
+def RawFrmImm16 : Format<43>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -210,7 +211,7 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
-// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
let FPForm = fp;
@@ -224,13 +225,13 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -411,6 +412,20 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasSSE42]>;
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
// AES Instruction Templates:
//
// AES8I
@@ -425,6 +440,18 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasAES]>;
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ OpSize, VEX_4V, Requires<[HasFMA3]>;
+
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 71c4e8bc147f..01149b699213 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -117,9 +117,67 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, v4f32>,
- SDTCisVT<2, v4f32>]>;
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
+def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -148,12 +206,13 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
+// 128-bit load pattern fragments
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit load pattern fragments
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
@@ -174,6 +233,8 @@ def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -183,7 +244,7 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr),
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit aligned load pattern fragments
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -206,15 +267,20 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
@@ -254,6 +320,7 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -261,6 +328,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ce471eadd78b..5280940cf437 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
{ X86::CALL32r, X86::CALL32m, 1, 0 },
{ X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
@@ -667,46 +668,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
}
-bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case X86::MOV8rr:
- case X86::MOV8rr_NOREX:
- case X86::MOV16rr:
- case X86::MOV32rr:
- case X86::MOV64rr:
- case X86::MOV32rr_TC:
- case X86::MOV64rr_TC:
-
- // FP Stack register class copies
- case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
- case X86::MOV_Fp3264: case X86::MOV_Fp3280:
- case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
- // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
- // copies are done with FsMOVAPSrr and FsMOVAPDrr.
-
- case X86::FsMOVAPSrr:
- case X86::FsMOVAPDrr:
- case X86::MOVAPSrr:
- case X86::MOVAPDrr:
- case X86::MOVDQArr:
- case X86::MMX_MOVQ64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -827,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) {
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 1, FrameIndex))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI->getOperand(0).getReg();
return 0;
}
@@ -866,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 0, FrameIndex))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
@@ -1664,14 +1626,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
-static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
- const X86InstrInfo &TII) {
- if (MI->getOpcode() == X86::FP_REG_KILL)
- return false;
- return TII.isUnpredicatedTerminator(MI);
-}
-
bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -1688,7 +1642,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
- if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ if (!isUnpredicatedTerminator(I))
break;
// A terminator that isn't a branch can't easily be handled by this
@@ -1891,6 +1845,33 @@ static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ return 0;
+}
+
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -1915,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = X86::MOVAPSrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -2046,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2130,8 +2115,9 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
@@ -2161,8 +2147,9 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
return true;
@@ -2423,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -2453,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
} else if (Ops.size() != 1)
return NULL;
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
@@ -2485,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
- if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
@@ -2991,561 +2998,6 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
return false;
}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const TargetInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg:
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= (1 << 0) | (1 << 2);
- break;
- case X86II::MRMSrcReg: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-/// sizePCRelativeBlockAddress - This method returns the size of a PC
-/// relative block address instruction
-///
-static unsigned sizePCRelativeBlockAddress() {
- return 4;
-}
-
-/// sizeGlobalAddress - Give the size of the emission of this global address
-///
-static unsigned sizeGlobalAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeConstPoolAddress - Give the size of the emission of this constant
-/// pool address
-///
-static unsigned sizeConstPoolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeExternalSymbolAddress - Give the size of the emission of this external
-/// symbol
-///
-static unsigned sizeExternalSymbolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeJumpTableAddress - Give the size of the emission of this jump
-/// table address
-///
-static unsigned sizeJumpTableAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-static unsigned sizeConstant(unsigned Size) {
- return Size;
-}
-
-static unsigned sizeRegModRMByte(){
- return 1;
-}
-
-static unsigned sizeSIBByte(){
- return 1;
-}
-
-static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
- unsigned FinalSize = 0;
- // If this is a simple integer displacement that doesn't require a relocation.
- if (!RelocOp) {
- FinalSize += sizeConstant(4);
- return FinalSize;
- }
-
- // Otherwise, this is something that requires a relocation.
- if (RelocOp->isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (RelocOp->isCPI()) {
- FinalSize += sizeConstPoolAddress(false);
- } else if (RelocOp->isJTI()) {
- FinalSize += sizeJumpTableAddress(false);
- } else {
- llvm_unreachable("Unknown value to relocate!");
- }
- return FinalSize;
-}
-
-static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
- bool IsPIC, bool Is64BitMode) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = 0;
- unsigned FinalSize = 0;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else if (Op3.isJTI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else {
- DispVal = 1;
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
-
- // Is a SIB byte needed?
- if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
- IndexReg.getReg() == 0 &&
- (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
- if (BaseReg == 0) { // Just a displacement?
- // Emit special case [disp32] encoding
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- } else {
- unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit simple indirect register encoding... [EAX] f.e.
- ++FinalSize;
- // Be pessimistic and assume it's a disp32, not a disp8
- } else {
- // Emit the most general non-SIB encoding: [REG+disp32]
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
-
- } else { // We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
- bool ForceDisp32 = false;
- if (BaseReg == 0 || DispForReloc) {
- // Emit the normal disp32 encoding.
- ++FinalSize;
- ForceDisp32 = true;
- } else {
- ++FinalSize;
- }
-
- FinalSize += sizeSIBByte();
-
- // Do we need to output a displacement?
- if (DispVal != 0 || ForceDisp32) {
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
- return FinalSize;
-}
-
-
-static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
- const TargetInstrDesc *Desc,
- bool IsPIC, bool Is64BitMode) {
-
- unsigned Opcode = Desc->Opcode;
- unsigned FinalSize = 0;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
-
- // Emit segment override opcode prefix as needed.
- switch (Desc->TSFlags & X86II::SegOvrMask) {
- case X86II::FS:
- case X86II::GS:
- ++FinalSize;
- break;
- default: llvm_unreachable("Invalid segment!");
- case 0: break; // No segment override!
- }
-
- // Emit the repeat opcode prefix as needed.
- if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
-
- // Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
-
- // Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
-
- bool Need0FPrefix = false;
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TB: // Two-byte opcode prefix
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- Need0FPrefix = true;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::REP: break; // already handled.
- case X86II::XS: // F3 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::XD: // F2 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
- case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
- ++FinalSize;
- break; // Two-byte opcode prefix
- default: llvm_unreachable("Invalid prefix!");
- case 0: break; // No prefix!
- }
-
- if (Is64BitMode) {
- // REX prefix
- unsigned REX = X86InstrInfo::determineREX(MI);
- if (REX)
- ++FinalSize;
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- if (Need0FPrefix)
- ++FinalSize;
-
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8: // 0F 38
- ++FinalSize;
- break;
- case X86II::TA: // 0F 3A
- ++FinalSize;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- break;
- }
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
- CurOp++;
- else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
-
- switch (Desc->TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- break;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
- FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- break;
- }
- case TargetOpcode::DBG_LABEL:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::DBG_VALUE:
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
- break;
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- ++FinalSize;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm:
- ++FinalSize;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- if (MO.isMBB()) {
- FinalSize += sizePCRelativeBlockAddress();
- } else if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (MO.isSymbol()) {
- FinalSize += sizeExternalSymbolAddress(false);
- } else if (MO.isImm()) {
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- } else {
- llvm_unreachable("Unknown RawFrm operand!");
- }
- }
- break;
-
- case X86II::AddRegFrm:
- ++FinalSize;
- ++CurOp;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRMDestReg: {
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
- case X86II::MRMDestMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRMSrcReg:
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
-
- case X86II::MRMSrcMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- ++FinalSize;
- if (Desc->getOpcode() == X86::LFENCE ||
- Desc->getOpcode() == X86::MFENCE) {
- // Special handling of lfence and mfence;
- FinalSize += sizeRegModRMByte();
- } else if (Desc->getOpcode() == X86::MONITOR ||
- Desc->getOpcode() == X86::MWAIT) {
- // Special handling of monitor and mwait.
- FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
- } else {
- ++CurOp;
- FinalSize += sizeRegModRMByte();
- }
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri32)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
-
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64mi32)
- dword = true;
- if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM_C1:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- FinalSize += 2;
- break;
- }
-
- case X86II::MRMInitReg:
- ++FinalSize;
- // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
- FinalSize += sizeRegModRMByte();
- ++CurOp;
- break;
- }
-
- if (!Desc->isVariadic() && CurOp != NumOps) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Cannot determine size: " << MI;
- report_fatal_error(Msg.str());
- }
-
-
- return FinalSize;
-}
-
-
-unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const TargetInstrDesc &Desc = MI->getDesc();
- bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
- unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
- if (Desc.getOpcode() == X86::MOVPC32r)
- Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
- return Size;
-}
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
@@ -3573,7 +3025,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
+ //PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
@@ -3589,6 +3041,22 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
@@ -3627,7 +3095,7 @@ namespace {
/// global base register for x86-32.
struct CGBR : public MachineFunctionPass {
static char ID;
- CGBR() : MachineFunctionPass(&ID) {}
+ CGBR() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ad0217adb475..f33620641e88 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -311,6 +311,12 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 43,
FormMask = 63,
@@ -439,27 +445,27 @@ namespace X86II {
//===------------------------------------------------------------------===//
// VEX - The opcode prefix used by AVX instructions
- VEX = 1ULL << 32,
+ VEX = 1U << 0,
// VEX_W - Has a opcode specific functionality, but is used in the same
// way as REX_W is for regular SSE instructions.
- VEX_W = 1ULL << 33,
+ VEX_W = 1U << 1,
// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
// address instructions in SSE are represented as 3 address ones in AVX
// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1ULL << 34,
+ VEX_4V = 1U << 2,
// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
// must be encoded in the i8 immediate field. This usually happens in
// instructions with 4 operands.
- VEX_I8IMM = 1ULL << 35,
+ VEX_I8IMM = 1U << 3,
// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
// instruction uses 256-bit wide registers. This is usually auto detected if
// a VR256 register is used, but some AVX instructions also have this field
// marked when using a f256 memory references.
- VEX_L = 1ULL << 36
+ VEX_L = 1U << 4
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -522,11 +528,12 @@ namespace X86II {
case X86II::AddRegFrm:
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
+ case X86II::RawFrmImm16:
return -1;
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
@@ -610,12 +617,6 @@ public:
///
virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
@@ -826,16 +827,11 @@ public:
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
}
- static unsigned determineREX(const MachineInstr &MI);
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
static bool isX86_64ExtendedReg(unsigned RegNo);
- /// GetInstSize - Returns the size of the specified MachineInstr.
- ///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 1efef5a80b1b..09b7721a621d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
@@ -222,7 +237,7 @@ def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
-//def i256mem : X86MemOperand<"printi256mem">;
+def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
@@ -333,15 +348,21 @@ def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+
+// FIXME: temporary hack to let codegen assert or generate poor code in case
+// no AVX version of the desired intructions is present, this is better for
+// incremental dev (without fallbacks it's easier to spot what's missing)
+def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
@@ -393,9 +414,7 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{
- return N->getSExtValue() == (int8_t)N->getSExtValue();
-}]>;
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
@@ -559,9 +578,10 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
// The main point of having separate instruction are extra unmodelled effects
// (compared to ordinary calls) like stack pointer change.
-def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86MingwAlloca)]>;
}
// Nop
@@ -574,10 +594,14 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+let Uses = [EFLAGS] in {
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+}
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -650,16 +674,16 @@ let Uses = [ECX], isBranch = 1, isTerminator = 1 in
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>;
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>;
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
- def FARJMP16i : Iseg16<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "ljmp{w}\t$seg, $off", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "ljmp{l}\t$seg, $off", []>;
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize;
@@ -670,9 +694,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// Loop instructions
-def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -695,12 +719,12 @@ let isCall = 1 in
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
"call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
- def FARCALL16i : Iseg16<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "lcall{w}\t$seg, $off", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "lcall{l}\t$seg, $off", []>;
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize;
@@ -721,7 +745,8 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
// Tail call stuff.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
@@ -756,7 +781,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
@@ -934,7 +959,7 @@ def SYSRET : I<0x07, RawFrm,
def SYSENTER : I<0x34, RawFrm,
(outs), (ins), "sysenter", []>, TB;
def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
@@ -1025,17 +1050,23 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|%al, $src}", []>;
+ "mov{b}\t{$src, %al|%al, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|%eax, $src}", []>;
+ "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, %al}", []>;
+ "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
+ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
+ "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ Requires<[In32BitMode]>;
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
@@ -1087,6 +1118,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
[(store GR32:$src, addr:$dst)]>;
/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
@@ -1101,10 +1133,12 @@ let mayStore = 1 in
def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[]>;
+}
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
@@ -1118,6 +1152,7 @@ let mayLoad = 1,
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+}
// Moves to and from debug registers
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
@@ -1137,7 +1172,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
// Extra precision multiplication
-// AL is really implied by AX, by the registers in Defs must match the
+// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
@@ -3895,6 +3930,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
// Atomic support
//
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in {
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+}
+
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
@@ -4928,6 +4977,12 @@ include "X86Instr64bit.td"
include "X86InstrFragmentsSIMD.td"
//===----------------------------------------------------------------------===//
+// FMA - Fused Multiply-Add support (requires FMA)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFMA.td"
+
+//===----------------------------------------------------------------------===//
// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 6cf7ac83620e..11d4179534dc 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -164,7 +164,7 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
"movdq2q\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ebe161b46bdc..f5466f83f519 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -142,7 +142,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, RC:$src2))], d>;
@@ -150,7 +150,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, (mem_frag addr:$src2)))], d>;
@@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+ [(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+ [(store FR64:$src, addr:$dst)]>, XD, VEX;
}
// Extract and store.
@@ -340,6 +340,15 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -516,6 +525,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
}
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ []>;
+}
+
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d> {
@@ -526,35 +543,58 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
+ X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- asm, []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src), asm, []>;
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
let isAsmParserOnly = 1 in {
-defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
- VEX_4V;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
- VEX_4V;
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+ VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+ VEX_4V, VEX_W;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+ VEX_4V;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+ VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+ VEX_4V, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -570,10 +610,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))]>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -588,35 +630,79 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm> {
+ PatFrag ld_frag, string asm, bit Is2Addr = 1> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
let isAsmParserOnly = 1 in {
defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
- VEX;
+ f32mem, load, "cvtss2si">, XS, VEX;
+ defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX;
+ f128mem, load, "cvtsd2si">, XD, VEX;
+ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+ // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+ // Get rid of this hack or rename the intrinsics, there are several
+ // intructions that only match with the intrinsic form, why create duplicates
+ // to let them be recognized by the assembler?
+ defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+ defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
}
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+ f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ f32mem, load, "cvtss2si{q}">, XS, REX_W;
defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
+ f128mem, load, "cvtsd2si">, XD;
+defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
+ REX_W;
+
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+ defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+ defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
+}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ "cvtsi2ss">, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}">, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
+ "cvtsi2sd">, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd">, XD, REX_W;
}
// Instructions below don't have an AVX form.
@@ -645,35 +731,48 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse_cvttss2si, f32mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
-defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse2_cvttsd2si, f128mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
+let isAsmParserOnly = 1 in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XS;
+ f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XD;
+ f128mem, load, "cvttss2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+ "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
@@ -701,13 +800,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- XS, VEX_4V;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ XS, VEX_4V;
let Constraints = "$src1 = $dst" in
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
@@ -806,6 +903,7 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -964,11 +1062,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
@@ -1029,6 +1127,39 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -1193,16 +1324,14 @@ let isAsmParserOnly = 1 in {
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
- let Pattern = []<dag> in {
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -1232,24 +1361,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
Domain d, bit IsConvertibleToThreeAddress = 0> {
- def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
- [(set VR128:$dst, (vt (shufp:$src3
- VR128:$src1, (mem_frag addr:$src2))))], d>;
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (shufp:$src3
+ RC:$src1, (mem_frag addr:$src2))))], d>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
- [(set VR128:$dst,
- (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst,
+ (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
let isAsmParserOnly = 1 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1351,12 +1486,23 @@ let isAsmParserOnly = 1 in {
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
"movmskpd", SSEPackedDouble>, OpSize,
VEX;
- // FIXME: merge with multiclass above when the intrinsics come.
- def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
+ VEX;
}
//===----------------------------------------------------------------------===//
@@ -1536,6 +1682,9 @@ let isCommutable = 0 in
///
/// These three forms can each be reg+reg or reg+mem.
///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -1565,7 +1714,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
@@ -1573,37 +1722,57 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
SSEPackedSingle, Is2Addr>, TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
SSEPackedDouble, Is2Addr>, TB, OpSize;
}
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
// Binary Arithmetic instructions
let isAsmParserOnly = 1 in {
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
}
@@ -1668,20 +1837,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, XS, Requires<[HasAVX, OptForSize]>;
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -1715,6 +1884,16 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -1738,21 +1917,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
- def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
- def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -1787,29 +1964,48 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
VEX_4V;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
- defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
// Square root.
@@ -1898,6 +2094,13 @@ let isAsmParserOnly = 1 in {
}
}
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
@@ -1961,11 +2164,14 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
@@ -1977,6 +2183,26 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
@@ -2003,35 +2229,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
//===---------------------------------------------------------------------===//
+
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isAsmParserOnly = 1 in {
- let neverHasSideEffects = 1 in
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ let neverHasSideEffects = 1 in {
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ }
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
- VEX;
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
}
@@ -2084,6 +2322,10 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -2376,6 +2618,25 @@ let ExeDomain = SSEPackedInt in {
}
} // Constraints = "$src1 = $dst"
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
@@ -2662,11 +2923,16 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, OpSize, VEX_4V;
+}
let Constraints = "$src1 = $dst" in
- defm VPINSRW : sse2_pinsrw, TB, OpSize;
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -2676,10 +2942,13 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in
-def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+let isAsmParserOnly = 1 in {
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2939,18 +3208,20 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Instructions to match in the assembler
let isAsmParserOnly = 1 in {
-// This instructions is in fact an alias to movd with 64 bit dst
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2970,19 +3241,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
-
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
@@ -3027,13 +3293,13 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// Convert Packed DW Integers to Packed Double FP
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3041,6 +3307,17 @@ def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE3 - Move Instructions
//===---------------------------------------------------------------------===//
@@ -3057,9 +3334,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memopv4f32 addr:$src), (undef)))]>;
}
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+ string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
-defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
-defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
@@ -3076,15 +3364,31 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(undef))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
@@ -3125,35 +3429,39 @@ let AddedComplexity = 20 in
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
-multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
def rr : I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- VR128:$src2))]>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
def rm : I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- (memop addr:$src2)))]>;
-
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
- VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
- VEX_4V;
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, XD, VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, OpSize, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, XD, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
- defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
- defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem>, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem>, TB, OpSize;
}
//===---------------------------------------------------------------------===//
@@ -3161,61 +3469,72 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
- def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ int_x86_avx_hsub_pd_256, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
- def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd>;
}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -3224,7 +3543,11 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
+}
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3240,26 +3563,28 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
int_x86_ssse3_pabs_d_128>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>,
+ SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
+ int_x86_ssse3_pabs_b>;
+
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>,
+ SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
+ int_x86_ssse3_pabs_w>;
+
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>,
+ SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
+ int_x86_ssse3_pabs_d>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3267,26 +3592,9 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128,
+ PatFrag mem_frag128, Intrinsic IntId128,
bit Is2Addr = 1> {
let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
@@ -3303,88 +3611,102 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
+ let isCommutable = 1 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128>;
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128>,
+ SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
+ int_x86_ssse3_phadd_w>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128>,
+ SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
+ int_x86_ssse3_phadd_d>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128>,
+ SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
+ int_x86_ssse3_phadd_sw>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128>,
+ SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
+ int_x86_ssse3_phsub_w>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128>,
+ SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
+ int_x86_ssse3_phsub_d>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128>,
+ SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
+ int_x86_ssse3_phsub_sw>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128>,
+ SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
+ int_x86_ssse3_pmadd_ub_sw>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b_128>,
+ SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ int_x86_ssse3_psign_b_128>,
+ SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
+ int_x86_ssse3_psign_b>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ int_x86_ssse3_psign_w_128>,
+ SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
+ int_x86_ssse3_psign_w>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ int_x86_ssse3_psign_d_128>,
+ SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
+ int_x86_ssse3_psign_d>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128>,
+ SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
+ int_x86_ssse3_pmul_hr_sw>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3396,22 +3718,16 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass sse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palign_mm<string asm> {
def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+}
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -3429,9 +3745,10 @@ multiclass sse3_palign<string asm, bit Is2Addr = 1> {
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : sse3_palign<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">,
+ ssse3_palign_mm<"palignr">;
let AddedComplexity = 5 in {
@@ -3732,31 +4049,62 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
(Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
-def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
-def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
@@ -3923,8 +4271,12 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -4007,8 +4359,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+}
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -4131,80 +4488,84 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+ (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
}
-multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr> {
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+ RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PSm : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
@@ -4261,8 +4622,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
}
-multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
// Intrinsic operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
@@ -4295,24 +4656,90 @@ multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
// FP round - roundss, roundps, roundsd, roundpd
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Intrinsic form
- defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
- VEX;
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd,
- 0>, VEX_4V;
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V;
+
// Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
- defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+ defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+ VEX;
+ defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
}
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
@@ -4431,79 +4858,104 @@ let Constraints = "$src1 = $dst" in
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem>;
}
- defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
- defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
- def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-
- def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- }
-}
-
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_ps_256>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -4529,30 +4981,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-// ptest instruction we'll lower to this in X86ISelLowering primarily from
-// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
-def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize, VEX;
-def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize, VEX;
-}
-
-let Defs = [EFLAGS] in {
-def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize;
-def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize;
-}
-
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -4603,17 +5031,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
//===----------------------------------------------------------------------===//
// Packed Compare Implicit Length Strings, Return Mask
-let Defs = [EFLAGS], usesCustomInserter = 1 in {
- def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "#PCMPISTRM128rr PSEUDO!",
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
- imm:$src3))]>, OpSize;
- def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "#PCMPISTRM128rm PSEUDO!",
+ imm:$src3))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
- VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
+ VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
@@ -4636,20 +5067,20 @@ let Defs = [XMM0, EFLAGS] in {
}
// Packed Compare Explicit Length Strings, Return Mask
-let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
- def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
-
- def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "#PCMPESTRM128rm PSEUDO!",
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
- OpSize;
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
@@ -4941,3 +5372,579 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Only the AVX version of CLMUL instructions are described here.
+
+// Carry-less Multiplication instructions
+let isAsmParserOnly = 1 in {
+def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+// Assembler Only
+multiclass avx_vpclmul<string asm> {
+ def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+
+ def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
+defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
+defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
+defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+} // isAsmParserOnly
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+} // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+ (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+ (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+ (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+ (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+ (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+ (bc_v4i32 (v2i64 (load addr:$src2)))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+ (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+ (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 2b8720bac343..36badb403e81 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -103,6 +103,9 @@ getNonexecutableStackSection(MCContext &Ctx) const {
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64)
+ GlobalPrefix = "";
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 23b0666f5f30..9564fe0b92d4 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -365,7 +365,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -429,10 +429,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if (TSFlags & X86II::VEX_W)
+ if ((TSFlags >> 32) & X86II::VEX_W)
VEX_W = 1;
- if (TSFlags & X86II::VEX_L)
+ if ((TSFlags >> 32) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -469,33 +469,39 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = 0;
+ bool IsDestMem = false;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMDestMem:
+ IsDestMem = true;
+ // The important info for the VEX prefix is never beyond the address
+ // registers. Don't check beyond that.
+ NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem:
- NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRMSrcMem:
case X86II::MRMSrcReg:
if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
-
- // CurOp and NumOps are equal when VEX_R represents a register used
- // to index a memory destination (which is the last operand)
- CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+ CurOp++;
if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
CurOp++;
}
+ // To only check operands before the memory address ones, start
+ // the search from the begining
+ if (IsDestMem)
+ CurOp = 0;
+
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if (TSFlags & X86II::VEX_I8IMM)
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -508,7 +514,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
}
break;
- default: // MRMDestReg, MRM0r-MRM7r
+ default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+ if (!MI.getNumOperands())
+ break;
+
if (MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0;
@@ -524,7 +533,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_R = 0x0;
}
break;
- assert(0 && "Not implemented!");
}
// Emit segment override opcode prefix as needed.
@@ -793,9 +801,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX)
+ if ((TSFlags >> 32) & X86II::VEX)
HasVEXPrefix = true;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// Determine where the memory operand starts, if present.
@@ -819,6 +827,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
+
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
@@ -833,10 +849,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
EmitMemModRMByte(MI, CurOp,
- GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
TSFlags, CurByte, OS, Fixups);
- CurOp += X86::AddrNumOperands + 1;
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMSrcReg:
@@ -934,7 +955,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if (TSFlags & X86II::VEX_I8IMM) {
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index e67fc06a6cd7..8c4620f92177 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,7 +16,6 @@
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MCAsmInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -29,21 +28,19 @@
#include "llvm/Type.h"
using namespace llvm;
-
-const X86Subtarget &X86MCInstLower::getSubtarget() const {
- return AsmPrinter.getSubtarget();
-}
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
- assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
- return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
}
MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
- const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering();
- return static_cast<const X86TargetLowering*>(TLI)->
- getPICBaseSymbol(AsmPrinter.MF, Ctx);
+ return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
+ getPICBaseSymbol(&MF, Ctx);
}
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
@@ -56,7 +53,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (!MO.isGlobal()) {
assert(MO.isSymbol());
- Name += AsmPrinter.MAI->getGlobalPrefix();
+ Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
} else {
const GlobalValue *GV = MO.getGlobal();
@@ -91,7 +88,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -105,7 +102,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -121,7 +118,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -178,7 +175,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
Ctx);
- if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) {
+ if (MO.isJTI() && MAI.hasSetDirective()) {
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -255,7 +252,13 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
}
/// \brief Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
unsigned AddrBase = IsStore;
unsigned RegOp = IsStore ? 0 : 5;
@@ -336,7 +339,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(MO,
- AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
}
@@ -377,12 +380,17 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
case X86::MMX_V_SETALLONES:
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -393,12 +401,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
case X86::TAILJMPr64:
case X86::CALL64r:
- case X86::CALL64pcrel32: {
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
unsigned Opcode = OutMI.getOpcode();
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
@@ -456,15 +466,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
- case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
- case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
- case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
- case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
- case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
- case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
@@ -505,46 +513,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &O) {
- // Only the target-dependent form of DBG_VALUE should get here.
- // Referencing the offset and metadata as NOps-2 and NOps-1 is
- // probably portable to other targets; frame pointer location is not.
- unsigned NOps = MI->getNumOperands();
- assert(NOps==7);
- O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext()).getDisplayName() << ":";
- O << V.getName();
- O << " <- ";
- // Frame address. Currently handles register +- offset only.
- O << '[';
- if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
- else
- O << "undef";
- O << '+'; printOperand(MI, 3, O);
- O << ']';
- O << "+";
- printOperand(MI, NOps-2, O);
-}
-
-MachineLocation
-X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
-
- if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
- return Location;
-}
-
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(OutContext, Mang, *this);
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
@@ -555,6 +526,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 9e5474fc81b3..539b09be6fd7 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -13,27 +13,30 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
+ class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
+ class MachineFunction;
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
+ class TargetMachine;
class X86AsmPrinter;
- class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
X86AsmPrinter &AsmPrinter;
-
- const X86Subtarget &getSubtarget() const;
public:
- X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter)
- : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {}
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5f31e00ebabd..fedd49ebb540 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -38,8 +38,15 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
@@ -193,6 +200,12 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::DR7:
return 7;
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -456,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- bool requiresRealignment =
- RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
// FIXME: Currently we don't support stack realignment for functions with
// variable-sized allocas.
- // FIXME: Temporary disable the error - it seems to be too conservative.
+ // FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
- return (requiresRealignment && !MFI->hasVarSizedObjects());
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
}
-bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const {
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
if (Reg == FramePtr && hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
@@ -610,10 +626,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const{
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -660,7 +675,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
MI.getOperand(i+3).setOffset(Offset);
}
- return 0;
}
void
@@ -750,7 +764,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -901,6 +915,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
DebugLoc DL;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
@@ -979,7 +1004,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA rule to use the provided offset.
if (StackSize) {
@@ -1007,7 +1032,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA to use the EBP/RBP register.
MachineLocation FPDst(FramePtr);
@@ -1047,7 +1072,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!HasFP && needsFrameMoves) {
// Mark callee-saved push instruction.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
// Define the current CFA rule to use the provided offset.
unsigned Ptr = StackSize ?
@@ -1062,7 +1087,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
DL = MBB.findDebugLoc(MBBI);
// Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe
+ // the stack and adjust the stack pointer in one go. The 64-bit version
+ // of __chkstk is only responsible for probing the stack. The 64-bit
+ // prologue is responsible for adjusting the stack pointer. Touching the
+ // stack at 4K increments is necessary to ensure that the guard pages used
+ // by the OS virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
// Check, whether EAX is livein for this function.
bool isEAXAlive = false;
for (MachineRegisterInfo::livein_iterator
@@ -1073,16 +1108,16 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
Reg == X86::AH || Reg == X86::AL);
}
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is necessary
- // to ensure that the guard pages used by the OS virtual memory manager are
- // allocated in correct sequence.
+
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1093,8 +1128,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
@@ -1119,7 +1155,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if ((NumBytes || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -1172,6 +1208,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -1519,7 +1566,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
namespace {
struct MSAH : public MachineFunctionPass {
static char ID;
- MSAH() : MachineFunctionPass(&ID) {}
+ MSAH() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index d852bcd2011c..527df05c58fc 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,18 +117,17 @@ public:
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
- bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 9f0382e3fae9..95269b15760e 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -241,6 +241,10 @@ let Namespace = "X86" in {
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+
+ // Pseudo index registers
+ def EIZ : Register<"eiz">;
+ def RIZ : Register<"riz">;
}
@@ -804,7 +808,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}];
}
-def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
@@ -829,4 +833,15 @@ def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
let CopyCost = -1; // Don't allow copying of status registers.
+
+ // EFLAGS is not allocatable.
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CCRClass::iterator
+ CCRClass::allocation_order_end(const MachineFunction &MF) const {
+ return allocation_order_begin(MF);
+ }
+ }];
}
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h
new file mode 100644
index 000000000000..df040520bc8f
--- /dev/null
+++ b/lib/Target/X86/X86ShuffleDecode.h
@@ -0,0 +1,155 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+enum {
+ SM_SentinelZero = ~0U
+};
+
+static inline
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+static void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+static void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+static void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+static void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i);
+ ShuffleMask.push_back(i+NElts);
+ }
+}
+
+static void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+static void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest
+ ShuffleMask.push_back(i+NElts); // Reads from src
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 4a10be518f03..0d02e5ee472b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -73,7 +73,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDefaultVisibility() &&
(isDecl || GV->isWeakForLinker()))
return X86II::MO_GOTPCREL;
- } else {
+ } else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
// Extra load is needed for all externally visible.
@@ -260,9 +260,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
- HasAES = IsIntel && ((ECX >> 25) & 0x1);
+ HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasAVX = ((ECX >> 28) & 0x1);
+ HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
@@ -291,6 +292,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
+ , HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 486dbc4e2e90..0ee91abe21f4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -74,6 +74,9 @@ protected:
/// HasAES - Target has AES instructions
bool HasAES;
+ /// HasCLMUL - Target has carry-less multiplication
+ bool HasCLMUL;
+
/// HasFMA3 - Target has 3-operand fused multiply-add
bool HasFMA3;
@@ -149,6 +152,7 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasAVX() const { return HasAVX; }
bool hasAES() const { return HasAES; }
+ bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
@@ -182,6 +186,10 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ }
+
std::string getDataLayout() const {
const char *p;
if (is64Bit())
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index df00d3ffcc79..ce8636eb72b5 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -46,8 +46,15 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
- default:
+ case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ default:
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
}
}
@@ -105,15 +112,21 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
-
+
// If no relocation model was picked, default as appropriate for the target.
if (getRelocationModel() == Reloc::Default) {
- if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- else if (Subtarget.is64Bit())
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
+ } else if (Subtarget.isTargetWin64())
setRelocationModel(Reloc::PIC_);
else
- setRelocationModel(Reloc::DynamicNoPIC);
+ setRelocationModel(Reloc::Static);
}
assert(getRelocationModel() != Reloc::Default &&
@@ -136,29 +149,27 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.isTargetDarwin() &&
is64Bit)
setRelocationModel(Reloc::PIC_);
-
+
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
} else if (Subtarget.isTargetCygMing()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else if (getRelocationModel() == Reloc::PIC_)
+ if (getRelocationModel() == Reloc::PIC_)
Subtarget.setPICStyle(PICStyles::StubPIC);
else {
assert(getRelocationModel() == Reloc::DynamicNoPIC);
Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
}
} else if (Subtarget.isTargetELF()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else
- Subtarget.setPICStyle(PICStyles::GOT);
+ Subtarget.setPICStyle(PICStyles::GOT);
}
-
+
// Finally, if we have "none" as our PIC style, force to static mode.
if (Subtarget.getPICStyle() == PICStyles::None)
setRelocationModel(Reloc::Static);
@@ -182,9 +193,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
- PM.add(createX87FPRegKillInserterPass());
-
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 6656bdc10eae..8f06dd32662f 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -264,15 +264,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
-
+
// Check for mov mnemonic
- unsigned src, dst, srcSR, dstSR;
- if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
- O << "\tmov " << getRegisterName(dst) << ", ";
- O << getRegisterName(src);
- } else {
+ if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ else
printInstruction(MI, O);
- }
OutStreamer.EmitRawText(O.str());
}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 1b8e7edfc7ca..38b35d7666c0 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
tablegen(XCoreGenCallingConv.inc -gen-callingconv)
tablegen(XCoreGenSubtarget.inc -gen-subtarget)
-add_llvm_target(XCore
+add_llvm_target(XCoreCodeGen
XCoreFrameInfo.cpp
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 5564ddf133ea..755ece7e9aba 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -56,6 +56,17 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
// Complex Pattern Selectors.
bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
SDValue &Offset);
@@ -151,17 +162,15 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
- if (Predicate_immMskBitp(N)) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
// Transformation function: get the size of a mask
- int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
- assert(isMask_32(MaskVal));
// Look for the first non-zero bit
- SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
- else if (! Predicate_immU16(N)) {
- unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ else if (!isUInt<16>(Val)) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index dd90ea976770..ad00046af17d 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -46,33 +46,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 4 kinds of patterns here:
- // add dst, src, 0
- // sub dst, src, 0
- // or dst, src, src
- // and dst, src, src
- if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus)
- && isZeroImm(MI.getOperand(2))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r)
- && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
@@ -437,7 +410,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
it->getFrameIdx(), RC, &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
+ BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
}
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e5b0171579fc..d2b116eef0d8 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 19b9b1f8c00c..6b3b39ba1d49 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -140,17 +140,7 @@ def immU20 : PatLeaf<(imm), [{
return (uint32_t)N->getZExtValue() < (1 << 20);
}]>;
-def immMskBitp : PatLeaf<(imm), [{
- uint32_t value = (uint32_t)N->getZExtValue();
- if (!isMask_32(value)) {
- return false;
- }
- int msksize = 32 - CountLeadingZeros_32(value);
- return (msksize >= 1 && msksize <= 8)
- || msksize == 16
- || msksize == 24
- || msksize == 32;
-}]>;
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
def immBitp : PatLeaf<(imm), [{
uint32_t value = (uint32_t)N->getZExtValue();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 2a88342180e4..f82e59814e77 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -155,10 +155,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
@@ -291,7 +290,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Erase old instruction.
MBB.erase(II);
- return 0;
}
void
@@ -420,7 +418,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Show update of SP.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
@@ -439,7 +437,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
MachineLocation CSSrc(XCore::LR);
MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
@@ -455,7 +453,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
MBB.addLiveIn(XCore::R10);
if (emitFrameMoves) {
MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
MachineLocation CSSrc(XCore::R10);
MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
@@ -467,7 +465,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
// Show FP is now valid.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(FramePtr);
MachineLocation SPSrc(MachineLocation::VirtualFP);
MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 66132ba8ff66..e636c1c7298a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,9 +54,8 @@ public:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index abfa514e20cf..838d5505490f 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -25,7 +25,7 @@ namespace {
// Hello - The first implementation, without getAnalysisUsage.
struct Hello : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello() : FunctionPass(&ID) {}
+ Hello() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -37,13 +37,13 @@ namespace {
}
char Hello::ID = 0;
-static RegisterPass<Hello> X("hello", "Hello World Pass");
+INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
namespace {
// Hello2 - The second implementation with getAnalysisUsage implemented.
struct Hello2 : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello2() : FunctionPass(&ID) {}
+ Hello2() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -60,5 +60,6 @@ namespace {
}
char Hello2::ID = 0;
-static RegisterPass<Hello2>
-Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
+INITIALIZE_PASS(Hello2, "hello2",
+ "Hello World Pass (with getAnalysisUsage implemented)",
+ false, false);
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 28ea079abd36..0c77e1fd8cff 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -67,7 +67,7 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(&ID), maxElements(maxElements) {}
+ : CallGraphSCCPass(ID), maxElements(maxElements) {}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +84,8 @@ namespace {
}
char ArgPromotion::ID = 0;
-static RegisterPass<ArgPromotion>
-X("argpromotion", "Promote 'by reference' arguments to scalars");
+INITIALIZE_PASS(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false);
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
@@ -208,8 +208,8 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
// have direct callees.
for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
UI != E; ++UI) {
- CallSite CS = CallSite::get(*UI);
- assert(CS.getInstruction() && "Should only have direct calls!");
+ CallSite CS(*UI);
+ assert(CS && "Should only have direct calls!");
if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
return false;
@@ -619,14 +619,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Get a new callgraph node for NF.
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
-
// Loop over all of the callers of the function, transforming the call sites
// to pass in the loaded pointers.
//
SmallVector<Value*, 16> Args;
while (!F->use_empty()) {
- CallSite CS = CallSite::get(F->use_back());
+ CallSite CS(F->use_back());
assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttrListPtr &CallPAL = CS.getAttributes();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 3c05f88027a6..64e8d792dc3a 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,12 @@
#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -31,7 +33,7 @@ STATISTIC(NumMerged, "Number of global constants merged");
namespace {
struct ConstantMerge : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(&ID) {}
+ ConstantMerge() : ModulePass(ID) {}
// run - For this pass, process all of the globals in the module,
// eliminating duplicate constants.
@@ -41,12 +43,32 @@ namespace {
}
char ConstantMerge::ID = 0;
-static RegisterPass<ConstantMerge>
-X("constmerge", "Merge Duplicate Global Constants");
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false);
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
bool ConstantMerge::runOnModule(Module &M) {
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
// Map unique constant/section pairs to globals. We don't want to merge
// globals in different sections.
DenseMap<Constant*, GlobalVariable*> CMap;
@@ -79,9 +101,13 @@ bool ConstantMerge::runOnModule(Module &M) {
// Only process constants with initializers in the default addres space.
if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+ GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
continue;
+
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 475eee8b19e4..47df235424e2 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -122,11 +122,11 @@ namespace {
protected:
// DAH uses this to specify a different ID.
- explicit DAE(void *ID) : ModulePass(ID) {}
+ explicit DAE(char &ID) : ModulePass(ID) {}
public:
static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(&ID) {}
+ DAE() : ModulePass(ID) {}
bool runOnModule(Module &M);
@@ -151,8 +151,7 @@ namespace {
char DAE::ID = 0;
-static RegisterPass<DAE>
-X("deadargelim", "Dead Argument Elimination");
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -160,15 +159,16 @@ namespace {
/// by bugpoint.
struct DAH : public DAE {
static char ID;
- DAH() : DAE(&ID) {}
+ DAH() : DAE(ID) {}
virtual bool ShouldHackArguments() const { return true; }
};
}
char DAH::ID = 0;
-static RegisterPass<DAH>
-Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)");
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false);
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
@@ -220,11 +220,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
//
std::vector<Value*> Args;
while (!Fn.use_empty()) {
- CallSite CS = CallSite::get(Fn.use_back());
+ CallSite CS(Fn.use_back());
Instruction *Call = CS.getInstruction();
// Pass all the same arguments.
- Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs);
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
AttrListPtr PAL = CS.getAttributes();
@@ -250,8 +250,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
- if (MDNode *N = Call->getDbgMetadata())
- New->setDbgMetadata(N);
+ New->setDebugLoc(Call->getDebugLoc());
Args.clear();
@@ -725,7 +724,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
//
std::vector<Value*> Args;
while (!F->use_empty()) {
- CallSite CS = CallSite::get(F->use_back());
+ CallSite CS(F->use_back());
Instruction *Call = CS.getInstruction();
AttributesVec.clear();
@@ -780,8 +779,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
- if (MDNode *N = Call->getDbgMetadata())
- New->setDbgMetadata(N);
+ New->setDebugLoc(Call->getDebugLoc());
Args.clear();
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 662fbb5cd413..5dc50c5bef32 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -26,7 +26,7 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
namespace {
struct DTE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- DTE() : ModulePass(&ID) {}
+ DTE() : ModulePass(ID) {}
// doPassInitialization - For this pass, it removes global symbol table
// entries for primitive types. These are never used for linking in GCC and
@@ -45,7 +45,7 @@ namespace {
}
char DTE::ID = 0;
-static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination");
+INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 7f67e48ade83..45c5fe76ba7c 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -17,15 +17,15 @@
#include "llvm/Pass.h"
#include "llvm/Constants.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
#include <algorithm>
using namespace llvm;
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
- std::vector<GlobalValue*> Named;
+ SetVector<GlobalValue *> Named;
bool deleteStuff;
- bool reLink;
public:
static char ID; // Pass identification, replacement for typeid
@@ -33,135 +33,42 @@ namespace {
/// specified function. Otherwise, it deletes as much of the module as
/// possible, except for the function specified.
///
- explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true,
- bool relinkCallees = false)
- : ModulePass(&ID), Named(GVs), deleteStuff(deleteS),
- reLink(relinkCallees) {}
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
bool runOnModule(Module &M) {
- if (Named.size() == 0) {
- return false; // Nothing to extract
- }
-
-
- if (deleteStuff)
- return deleteGV();
- M.setModuleInlineAsm("");
- return isolateGV(M);
- }
-
- bool deleteGV() {
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- if (Function* NamedFunc = dyn_cast<Function>(*GI)) {
- // If we're in relinking mode, set linkage of all internal callees to
- // external. This will allow us extract function, and then - link
- // everything together
- if (reLink) {
- for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end();
- B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), E = B->end();
- I != E; ++I) {
- if (CallInst* callInst = dyn_cast<CallInst>(&*I)) {
- Function* Callee = callInst->getCalledFunction();
- if (Callee && Callee->hasLocalLinkage())
- Callee->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- }
-
- NamedFunc->setLinkage(GlobalValue::ExternalLinkage);
- NamedFunc->deleteBody();
- assert(NamedFunc->isDeclaration() && "This didn't make the function external!");
- } else {
- if (!(*GI)->isDeclaration()) {
- cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- return true;
- }
-
- bool isolateGV(Module &M) {
- // Mark all globals internal
- // FIXME: what should we do with private linkage?
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->setInitializer(0);
}
+
+ // Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
- }
-
- // Make sure our result is globally accessible...
- // by putting them in the used array
- {
- std::vector<Constant *> AUGs;
- const Type *SBP=
- Type::getInt8PtrTy(M.getContext());
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP));
- }
- ArrayType *AT = ArrayType::get(SBP, AUGs.size());
- Constant *Init = ConstantArray::get(AT, AUGs);
- GlobalValue *gv = new GlobalVariable(M, AT, false,
- GlobalValue::AppendingLinkage,
- Init, "llvm.used");
- gv->setSection("llvm.metadata");
- }
-
- // All of the functions may be used by global variables or the named
- // globals. Loop through them and create a new, external functions that
- // can be "used", instead of ones with bodies.
- std::vector<Function*> NewFunctions;
-
- Function *Last = --M.end(); // Figure out where the last real fn is.
-
- for (Module::iterator I = M.begin(); ; ++I) {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- Function *New = Function::Create(I->getFunctionType(),
- GlobalValue::ExternalLinkage);
- New->copyAttributesFrom(I);
-
- // If it's not the named function, delete the body of the function
- I->dropAllReferences();
-
- M.getFunctionList().push_back(New);
- NewFunctions.push_back(New);
- New->takeName(I);
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->deleteBody();
}
- if (&*I == Last) break; // Stop after processing the last function
- }
-
- // Now that we have replacements all set up, loop through the module,
- // deleting the old functions, replacing them with the newly created
- // functions.
- if (!NewFunctions.empty()) {
- unsigned FuncNum = 0;
- Module::iterator I = M.begin();
- do {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- // Make everything that uses the old function use the new dummy fn
- I->replaceAllUsesWith(NewFunctions[FuncNum++]);
-
- Function *Old = I;
- ++I; // Move the iterator to the new function
-
- // Delete the old function!
- M.getFunctionList().erase(Old);
-
- } else {
- ++I; // Skip the function we are extracting
- }
- } while (&*I != NewFunctions[0]);
- }
-
return true;
}
};
@@ -170,6 +77,6 @@ namespace {
}
ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
- bool deleteFn, bool relinkCallees) {
- return new GVExtractorPass(GVs, deleteFn, relinkCallees);
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 9bd7af61c531..6165ba023f73 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(&ID) {}
+ FunctionAttrs() : CallGraphSCCPass(ID) {}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -69,8 +69,8 @@ namespace {
}
char FunctionAttrs::ID = 0;
-static RegisterPass<FunctionAttrs>
-X("functionattrs", "Deduce function attributes");
+INITIALIZE_PASS(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false);
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
@@ -162,14 +162,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- CallSite CS = CallSite::get(I);
- if (CS.getInstruction() && CS.getCalledFunction()) {
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()) {
// Ignore calls to functions in the same SCC.
if (SCCNodes.count(CS.getCalledFunction()))
continue;
// Ignore intrinsics that only access local memory.
if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
- if (AliasAnalysis::getModRefBehavior(id) ==
+ if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
AliasAnalysis::AccessesArguments) {
// Check that all pointer arguments point to local memory.
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 44216a6df99c..aa18601b9aec 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,7 @@ STATISTIC(NumVariables, "Number of global variables removed");
namespace {
struct GlobalDCE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(&ID) {}
+ GlobalDCE() : ModulePass(ID) {}
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
@@ -51,7 +51,8 @@ namespace {
}
char GlobalDCE::ID = 0;
-static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination");
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false);
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 735a1c47c39b..a77af549caa1 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -59,7 +59,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(&ID) {}
+ GlobalOpt() : ModulePass(ID) {}
bool runOnModule(Module &M);
@@ -74,7 +74,8 @@ namespace {
}
char GlobalOpt::ID = 0;
-static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false);
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -1467,7 +1468,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
TargetData *TD) {
if (!TD)
return false;
-
+
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -2077,7 +2078,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
// The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
if (!CI || !CI->isZero()) return false;
// The remaining indices must be compile-time known integers within the
@@ -2302,7 +2303,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (isa<InlineAsm>(CI->getCalledValue())) return false;
// Resolve function pointers.
- Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue()));
+ Function *Callee = dyn_cast<Function>(getVal(Values,
+ CI->getCalledValue()));
if (!Callee) return false; // Cannot resolve.
SmallVector<Constant*, 8> Formals;
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index e4db235b1d10..1b3cf7810cc6 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,7 @@ namespace {
///
struct IPCP : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(&ID) {}
+ IPCP() : ModulePass(ID) {}
bool runOnModule(Module &M);
private:
@@ -45,8 +45,8 @@ namespace {
}
char IPCP::ID = 0;
-static RegisterPass<IPCP>
-X("ipconstprop", "Interprocedural constant propagation");
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false);
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
@@ -94,7 +94,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
return false;
- CallSite CS = CallSite::get(cast<Instruction>(U));
+ CallSite CS(cast<Instruction>(U));
if (!CS.isCallee(UI))
return false;
@@ -219,7 +219,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// constant.
bool MadeChange = false;
for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
- CallSite CS = CallSite::get(*UI);
+ CallSite CS(*UI);
Instruction* Call = CS.getInstruction();
// Not a call instruction or a call instruction that's not calling F
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 8e312e7d9185..ecc60ad06932 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -36,7 +36,7 @@ namespace {
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(&ID, -2000000000) {}
+ AlwaysInliner() : Inliner(ID, -2000000000) {}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +61,8 @@ namespace {
}
char AlwaysInliner::ID = 0;
-static RegisterPass<AlwaysInliner>
-X("always-inline", "Inliner for always_inline functions");
+INITIALIZE_PASS(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false);
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 74b4a1c10ece..9c6637dfe5ad 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -33,8 +33,8 @@ namespace {
SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- SimpleInliner() : Inliner(&ID) {}
- SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {}
+ SimpleInliner() : Inliner(ID) {}
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +56,8 @@ namespace {
}
char SimpleInliner::ID = 0;
-static RegisterPass<SimpleInliner>
-X("inline", "Function Integration/Inlining");
+INITIALIZE_PASS(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false);
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9bb01f5699fe..4983e8e13a3e 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -48,10 +48,10 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(void *ID)
+Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
-Inliner::Inliner(void *ID, int Threshold)
+Inliner::Inliner(char &ID, int Threshold)
: CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
@@ -238,11 +238,11 @@ bool Inliner::shouldInline(CallSite CS) {
bool someOuterCallWouldNotBeInlined = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
I != E; ++I) {
- CallSite CS2 = CallSite::get(*I);
+ CallSite CS2(*I);
// If this isn't a call to Caller (it could be some other sort
// of reference) skip it.
- if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+ if (!CS2 || CS2.getCalledFunction() != Caller)
continue;
InlineCost IC2 = getInlineCost(CS2);
@@ -334,10 +334,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallSite CS = CallSite::get(I);
+ CallSite CS(cast<Value>(I));
// If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
- if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+ if (!CS || isa<IntrinsicInst>(I))
continue;
// If this is a direct call to an external function, we can never inline
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 47abb7dfd812..a1d919fd8a04 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -63,11 +63,11 @@ namespace {
} // end anonymous namespace
char InternalizePass::ID = 0;
-static RegisterPass<InternalizePass>
-X("internalize", "Internalize Global Symbols");
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false);
InternalizePass::InternalizePass(bool AllButMain)
- : ModulePass(&ID), AllButMain(AllButMain){
+ : ModulePass(ID), AllButMain(AllButMain){
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
if (!APIList.empty()) // If a list is specified, use it as well.
@@ -75,7 +75,7 @@ InternalizePass::InternalizePass(bool AllButMain)
}
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
- : ModulePass(&ID), AllButMain(false){
+ : ModulePass(ID), AllButMain(false){
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index cb813303facb..f88dff67d7c9 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -37,7 +37,7 @@ namespace {
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(&ID), NumLoops(numLoops) {}
+ : LoopPass(ID), NumLoops(numLoops) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -50,8 +50,8 @@ namespace {
}
char LoopExtractor::ID = 0;
-static RegisterPass<LoopExtractor>
-X("loop-extract", "Extract loops into new functions");
+INITIALIZE_PASS(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false);
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -62,8 +62,8 @@ namespace {
} // End anonymous namespace
char SingleLoopExtractor::ID = 0;
-static RegisterPass<SingleLoopExtractor>
-Y("loop-extract-single", "Extract at most one loop into a new function");
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false);
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
@@ -147,27 +147,26 @@ namespace {
std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
public:
static char ID; // Pass identification, replacement for typeid
- explicit BlockExtractorPass(const std::vector<BasicBlock*> &B)
- : ModulePass(&ID), BlocksToNotExtract(B) {
+ BlockExtractorPass() : ModulePass(ID) {
if (!BlockFile.empty())
LoadFile(BlockFile.c_str());
}
- BlockExtractorPass() : ModulePass(&ID) {}
bool runOnModule(Module &M);
};
}
char BlockExtractorPass::ID = 0;
-static RegisterPass<BlockExtractorPass>
-XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)");
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false);
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
//
-ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE)
+ModulePass *llvm::createBlockExtractorPass()
{
- return new BlockExtractorPass(BTNE);
+ return new BlockExtractorPass();
}
void BlockExtractorPass::LoadFile(const char *Filename) {
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 76cfef8335c9..6c715de04b76 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -109,7 +109,7 @@ namespace {
bool IsTransformableFunction(StringRef Name);
public:
static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(&ID) {}
+ LowerSetJmp() : ModulePass(ID) {}
void visitCallInst(CallInst& CI);
void visitInvokeInst(InvokeInst& II);
@@ -122,7 +122,7 @@ namespace {
} // end anonymous namespace
char LowerSetJmp::ID = 0;
-static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump");
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index aeeafe7fd19d..5d838f98aa08 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -29,44 +29,27 @@
//
// Many functions have their address taken by the virtual function table for
// the object they belong to. However, as long as it's only used for a lookup
-// and call, this is irrelevant, and we'd like to fold such implementations.
+// and call, this is irrelevant, and we'd like to fold such functions.
//
-// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
//
-// The current implementation loops over a pair-wise comparison of all
-// functions in the program where the two functions in the pair are treated as
-// assumed to be equal until proven otherwise. We could both use fewer
-// comparisons and optimize more complex cases if we used strongly connected
-// components of the call graph.
-//
-// * be smarter about bitcast.
+// * be smarter about bitcasts.
//
// In order to fold functions, we will sometimes add either bitcast instructions
// or bitcast constant expressions. Unfortunately, this can confound further
// analysis since the two functions differ where one has a bitcast and the
-// other doesn't. We should learn to peer through bitcasts without imposing bad
-// performance properties.
-//
-// * don't emit aliases for Mach-O.
-//
-// Mach-O doesn't support aliases which means that we must avoid introducing
-// them in the bitcode on architectures which don't support them, such as
-// Mac OSX. There's a few approaches to this problem;
-// a) teach codegen to lower global aliases to thunks on platforms which don't
-// support them.
-// b) always emit thunks, and create a separate thunk-to-alias pass which
-// runs on ELF systems. This has the added benefit of transforming other
-// thunks such as those produced by a C++ frontend into aliases when legal
-// to do so.
+// other doesn't. We should learn to look through bitcasts.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Constants.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@@ -76,68 +59,103 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include <map>
#include <vector>
using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
namespace {
+ /// MergeFunctions finds functions which will generate identical machine code,
+ /// by considering all pointer types to be equivalent. Once identified,
+ /// MergeFunctions will fold them by replacing a call to one to a call to a
+ /// bitcast of the other.
+ ///
class MergeFunctions : public ModulePass {
public:
- static char ID; // Pass identification, replacement for typeid
- MergeFunctions() : ModulePass(&ID) {}
+ static char ID;
+ MergeFunctions() : ModulePass(ID) {}
bool runOnModule(Module &M);
private:
- bool isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2);
-
- bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
- bool equals(const Function *F, const Function *G);
+ /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
+ /// may be deleted, or may be converted into a thunk. In either case, it
+ /// should never be visited again.
+ void MergeTwoFunctions(Function *F, Function *G) const;
- bool compare(const Value *V1, const Value *V2);
+ /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
+ /// replace direct uses of G with bitcast(F).
+ void WriteThunk(Function *F, Function *G) const;
- const Function *LHS, *RHS;
- typedef DenseMap<const Value *, unsigned long> IDMap;
- IDMap Map;
- DenseMap<const Function *, IDMap> Domains;
- DenseMap<const Function *, unsigned long> DomainCount;
TargetData *TD;
};
}
char MergeFunctions::ID = 0;
-static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions");
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
}
-// ===----------------------------------------------------------------------===
-// Comparison of functions
-// ===----------------------------------------------------------------------===
+namespace {
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const TargetData *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+
+ /// Compare - test whether the two functions have equivalent behaviour.
+ bool Compare();
+
+private:
+ /// Compare - test whether two basic blocks have equivalent behaviour.
+ bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Enumerate - Assign or look up previously assigned numbers for the two
+ /// values, and return whether the numbers are equal. Numbers are assigned in
+ /// the order visited.
+ bool Enumerate(const Value *V1, const Value *V2);
+
+ /// isEquivalentOperation - Compare two Instructions for equivalence, similar
+ /// to Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
-static unsigned long hash(const Function *F) {
- const FunctionType *FTy = F->getFunctionType();
+ /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
- return ID.ComputeHash();
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const TargetData *TD;
+
+ typedef DenseMap<const Value *, unsigned long> IDMap;
+ IDMap Map1, Map2;
+ unsigned long IDMap1Count, IDMap2Count;
+};
}
-/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
-/// type equivalence rules apply.
-static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+/// isEquivalentType - any two pointers in the same address space are
+/// equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+ const Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID())
@@ -184,21 +202,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return true;
}
- case Type::UnionTyID: {
- const UnionType *UTy1 = cast<UnionType>(Ty1);
- const UnionType *UTy2 = cast<UnionType>(Ty2);
-
- // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
- if (UTy1->getNumElements() != UTy2->getNumElements())
- return false;
-
- for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
- if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
- return false;
- }
- return true;
- }
-
case Type::FunctionTyID: {
const FunctionType *FTy1 = cast<FunctionType>(Ty1);
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -216,11 +219,18 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return true;
}
- case Type::ArrayTyID:
+ case Type::ArrayTyID: {
+ const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy1->getNumElements() == ATy2->getNumElements() &&
+ isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ }
+
case Type::VectorTyID: {
- const SequentialType *STy1 = cast<SequentialType>(Ty1);
- const SequentialType *STy2 = cast<SequentialType>(Ty2);
- return isEquivalentType(STy1->getElementType(), STy2->getElementType());
+ const VectorType *VTy1 = cast<VectorType>(Ty1);
+ const VectorType *VTy2 = cast<VectorType>(Ty2);
+ return VTy1->getNumElements() == VTy2->getNumElements() &&
+ isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
}
}
}
@@ -228,8 +238,8 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
/// isEquivalentOperation - determine whether the two operations are the same
/// except that pointer-to-A and pointer-to-B are equivalent. This should be
/// kept in sync with Instruction::isSameOperationAs.
-static bool
-isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
if (I1->getOpcode() != I2->getOpcode() ||
I1->getNumOperands() != I2->getNumOperands() ||
!isEquivalentType(I1->getType(), I2->getType()) ||
@@ -281,18 +291,15 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
return true;
}
-bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2) {
+/// isEquivalentGEP - determine whether two GEP operations perform the same
+/// underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
- SmallVector<Value *, 8> Indices1, Indices2;
- for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
- E = GEP1->idx_end(); I != E; ++I) {
- Indices1.push_back(*I);
- }
- for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
- E = GEP2->idx_end(); I != E; ++I) {
- Indices2.push_back(*I);
- }
+ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+ SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
Indices1.data(), Indices1.size());
uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
@@ -300,7 +307,6 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
return Offset1 == Offset2;
}
- // Equivalent types aren't enough.
if (GEP1->getPointerOperand()->getType() !=
GEP2->getPointerOperand()->getType())
return false;
@@ -309,19 +315,26 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
return false;
for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+ if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
return false;
}
return true;
}
-bool MergeFunctions::compare(const Value *V1, const Value *V2) {
- if (V1 == LHS || V1 == RHS)
- if (V2 == LHS || V2 == RHS)
- return true;
+/// Enumerate - Compare two values used by the two functions under pair-wise
+/// comparison. If this is the first time the values are seen, they're added to
+/// the mapping so that we will detect mismatches on next use.
+bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
- // TODO: constant expressions in terms of LHS and RHS
+ // TODO: constant expressions with GEP or references to F1 or F2.
if (isa<Constant>(V1))
return V1 == V2;
@@ -332,228 +345,138 @@ bool MergeFunctions::compare(const Value *V1, const Value *V2) {
IA1->getConstraintString() == IA2->getConstraintString();
}
- // We enumerate constants globally and arguments, basic blocks or
- // instructions within the function they belong to.
- const Function *Domain1 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V1)) {
- Domain1 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
- Domain1 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
- Domain1 = I->getParent()->getParent();
- }
-
- const Function *Domain2 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V2)) {
- Domain2 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
- Domain2 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
- Domain2 = I->getParent()->getParent();
- }
-
- if (Domain1 != Domain2)
- if (Domain1 != LHS && Domain1 != RHS)
- if (Domain2 != LHS && Domain2 != RHS)
- return false;
-
- IDMap &Map1 = Domains[Domain1];
unsigned long &ID1 = Map1[V1];
if (!ID1)
- ID1 = ++DomainCount[Domain1];
+ ID1 = ++IDMap1Count;
- IDMap &Map2 = Domains[Domain2];
unsigned long &ID2 = Map2[V2];
if (!ID2)
- ID2 = ++DomainCount[Domain2];
+ ID2 = ++IDMap2Count;
return ID1 == ID2;
}
-bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
- BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
- BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+/// Compare - test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
do {
- if (!compare(FI, GI))
+ if (!Enumerate(F1I, F2I))
return false;
- if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
- const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
- const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
- if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
if (!isEquivalentGEP(GEP1, GEP2))
return false;
} else {
- if (!isEquivalentOperation(FI, GI))
+ if (!isEquivalentOperation(F1I, F2I))
return false;
- for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
- Value *OpF = FI->getOperand(i);
- Value *OpG = GI->getOperand(i);
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
- if (!compare(OpF, OpG))
+ if (!Enumerate(OpF1, OpF2))
return false;
- if (OpF->getValueID() != OpG->getValueID() ||
- !isEquivalentType(OpF->getType(), OpG->getType()))
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
return false;
}
}
- ++FI, ++GI;
- } while (FI != FE && GI != GE);
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
- return FI == FE && GI == GE;
+ return F1I == F1E && F2I == F2E;
}
-bool MergeFunctions::equals(const Function *F, const Function *G) {
+/// Compare - test whether the two functions have equivalent behaviour.
+bool FunctionComparator::Compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
- if (F->getAttributes() != G->getAttributes())
+ if (F1->getAttributes() != F2->getAttributes())
return false;
- if (F->hasGC() != G->hasGC())
+ if (F1->hasGC() != F2->hasGC())
return false;
- if (F->hasGC() && F->getGC() != G->getGC())
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
return false;
- if (F->hasSection() != G->hasSection())
+ if (F1->hasSection() != F2->hasSection())
return false;
- if (F->hasSection() && F->getSection() != G->getSection())
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
return false;
- if (F->isVarArg() != G->isVarArg())
+ if (F1->isVarArg() != F2->isVarArg())
return false;
// TODO: if it's internal and only used in direct calls, we could handle this
// case too.
- if (F->getCallingConv() != G->getCallingConv())
+ if (F1->getCallingConv() != F2->getCallingConv())
return false;
- if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
return false;
- assert(F->arg_size() == G->arg_size() &&
+ assert(F1->arg_size() == F2->arg_size() &&
"Identical functions have a different number of args.");
- LHS = F;
- RHS = G;
-
// Visit the arguments so that they get enumerated in the order they're
// passed in.
- for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
- fe = F->arg_end(); fi != fe; ++fi, ++gi) {
- if (!compare(fi, gi))
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!Enumerate(f1i, f2i))
llvm_unreachable("Arguments repeat");
}
- SmallVector<const BasicBlock *, 8> FBBs, GBBs;
- SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
- FBBs.push_back(&F->getEntryBlock());
- GBBs.push_back(&G->getEntryBlock());
- VisitedBBs.insert(FBBs[0]);
- while (!FBBs.empty()) {
- const BasicBlock *FBB = FBBs.pop_back_val();
- const BasicBlock *GBB = GBBs.pop_back_val();
- if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
- Domains.clear();
- DomainCount.clear();
- return false;
- }
- const TerminatorInst *FTI = FBB->getTerminator();
- const TerminatorInst *GTI = GBB->getTerminator();
- assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
- for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(FTI->getSuccessor(i)))
- continue;
- FBBs.push_back(FTI->getSuccessor(i));
- GBBs.push_back(GTI->getSuccessor(i));
- }
- }
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
- Domains.clear();
- DomainCount.clear();
- return true;
-}
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
-// ===----------------------------------------------------------------------===
-// Folding of functions
-// ===----------------------------------------------------------------------===
-
-// Cases:
-// * F is external strong, G is external strong:
-// turn G into a thunk to F (1)
-// * F is external strong, G is external weak:
-// turn G into a thunk to F (1)
-// * F is external weak, G is external weak:
-// unfoldable
-// * F is external strong, G is internal:
-// address of G taken:
-// turn G into a thunk to F (1)
-// address of G not taken:
-// make G an alias to F (2)
-// * F is internal, G is external weak
-// address of F is taken:
-// turn G into a thunk to F (1)
-// address of F is not taken:
-// make G an alias of F (2)
-// * F is internal, G is internal:
-// address of F and G are taken:
-// turn G into a thunk to F (1)
-// address of G is not taken:
-// make G an alias to F (2)
-//
-// alias requires linkage == (external,local,weak) fallback to creating a thunk
-// external means 'externally visible' linkage != (internal,private)
-// internal means linkage == (internal,private)
-// weak means linkage mayBeOverridable
-// being external implies that the address is taken
-//
-// 1. turn G into a thunk to F
-// 2. make G an alias to F
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
-enum LinkageCategory {
- ExternalStrong,
- ExternalWeak,
- Internal
-};
+ if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ return false;
-static LinkageCategory categorize(const Function *F) {
- switch (F->getLinkage()) {
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
- return Internal;
-
- case GlobalValue::WeakAnyLinkage:
- case GlobalValue::WeakODRLinkage:
- case GlobalValue::ExternalWeakLinkage:
- case GlobalValue::LinkerPrivateWeakLinkage:
- return ExternalWeak;
-
- case GlobalValue::ExternalLinkage:
- case GlobalValue::AvailableExternallyLinkage:
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::AppendingLinkage:
- case GlobalValue::DLLImportLinkage:
- case GlobalValue::DLLExportLinkage:
- case GlobalValue::CommonLinkage:
- return ExternalStrong;
- }
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
- llvm_unreachable("Unknown LinkageType.");
- return ExternalWeak;
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
}
-static void ThunkGToF(Function *F, Function *G) {
+/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
+/// direct uses of G with bitcast(F).
+void MergeFunctions::WriteThunk(Function *F, Function *G) const {
if (!G->mayBeOverridden()) {
// Redirect direct callers of G to F.
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
@@ -567,33 +490,34 @@ static void ThunkGToF(Function *F, Function *G) {
}
}
+ // If G was internal then we may have replaced all uses if G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
G->getParent());
BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
SmallVector<Value *, 16> Args;
unsigned i = 0;
const FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- if (FFTy->getParamType(i) == AI->getType()) {
- Args.push_back(AI);
- } else {
- Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
- }
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
++i;
}
- CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+ CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
if (NewG->getReturnType()->isVoidTy()) {
- ReturnInst::Create(F->getContext(), BB);
- } else if (CI->getType() != NewG->getReturnType()) {
- Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
- ReturnInst::Create(F->getContext(), BCI, BB);
+ Builder.CreateRetVoid();
} else {
- ReturnInst::Create(F->getContext(), CI, BB);
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
@@ -602,152 +526,126 @@ static void ThunkGToF(Function *F, Function *G) {
G->eraseFromParent();
}
-static void AliasGToF(Function *F, Function *G) {
- // Darwin will trigger llvm_unreachable if asked to codegen an alias.
- return ThunkGToF(F, G);
-
-#if 0
- if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
- return ThunkGToF(F, G);
-
- GlobalAlias *GA = new GlobalAlias(
- G->getType(), G->getLinkage(), "",
- ConstantExpr::getBitCast(F, G->getType()), G->getParent());
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
- GA->takeName(G);
- GA->setVisibility(G->getVisibility());
- G->replaceAllUsesWith(GA);
- G->eraseFromParent();
-#endif
-}
-
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
- Function *F = FnVec[i];
- Function *G = FnVec[j];
-
- LinkageCategory catF = categorize(F);
- LinkageCategory catG = categorize(G);
-
- if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(catF, catG);
- }
-
- switch (catF) {
- case ExternalStrong:
- switch (catG) {
- case ExternalStrong:
- case ExternalWeak:
- ThunkGToF(F, G);
- break;
- case Internal:
- if (G->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- }
- break;
-
- case ExternalWeak: {
- assert(catG == ExternalWeak);
+/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
+/// Function G is deleted.
+void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
+ if (F->isWeakForLinker()) {
+ assert(G->isWeakForLinker());
// Make them both thunks to the same internal function.
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
F->getParent());
H->copyAttributesFrom(F);
H->takeName(F);
F->replaceAllUsesWith(H);
- ThunkGToF(F, G);
- ThunkGToF(F, H);
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- F->setLinkage(GlobalValue::InternalLinkage);
- } break;
-
- case Internal:
- switch (catG) {
- case ExternalStrong:
- llvm_unreachable(0);
- // fall-through
- case ExternalWeak:
- if (F->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- case Internal: {
- bool addrTakenF = F->hasAddressTaken();
- bool addrTakenG = G->hasAddressTaken();
- if (!addrTakenF && addrTakenG) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(addrTakenF, addrTakenG);
- }
+ WriteThunk(F, G);
+ WriteThunk(F, H);
- if (addrTakenF && addrTakenG) {
- ThunkGToF(F, G);
- } else {
- assert(!addrTakenG);
- AliasGToF(F, G);
- }
- } break;
- } break;
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ } else {
+ WriteThunk(F, G);
}
++NumFunctionsMerged;
- return true;
}
-// ===----------------------------------------------------------------------===
-// Pass definition
-// ===----------------------------------------------------------------------===
+static unsigned ProfileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
-bool MergeFunctions::runOnModule(Module &M) {
- bool Changed = false;
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
+}
- std::map<unsigned long, std::vector<Function *> > FnMap;
+class ComparableFunction {
+public:
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration())
- continue;
+ AssertingVH<Function> const Func;
+ const unsigned Hash;
+ TargetData * const TD;
+};
- FnMap[hash(F)].push_back(F);
+struct MergeFunctionsEqualityInfo {
+ static ComparableFunction *getEmptyKey() {
+ return reinterpret_cast<ComparableFunction*>(0);
+ }
+ static ComparableFunction *getTombstoneKey() {
+ return reinterpret_cast<ComparableFunction*>(-1);
}
+ static unsigned getHashValue(const ComparableFunction *CF) {
+ return CF->Hash;
+ }
+ static bool isEqual(const ComparableFunction *LHS,
+ const ComparableFunction *RHS) {
+ if (LHS == RHS)
+ return true;
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
+ return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
+ }
+};
+bool MergeFunctions::runOnModule(Module &M) {
+ typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+
+ bool Changed = false;
TD = getAnalysisIfAvailable<TargetData>();
+ std::vector<Function *> Funcs;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ Funcs.push_back(F);
+ }
+
bool LocalChanged;
do {
LocalChanged = false;
- DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
- for (std::map<unsigned long, std::vector<Function *> >::iterator
- I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
- std::vector<Function *> &FnVec = I->second;
- DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
-
- for (int i = 0, e = FnVec.size(); i != e; ++i) {
- for (int j = i + 1; j != e; ++j) {
- bool isEqual = equals(FnVec[i], FnVec[j]);
-
- DEBUG(dbgs() << " " << FnVec[i]->getName()
- << (isEqual ? " == " : " != ")
- << FnVec[j]->getName() << "\n");
-
- if (isEqual) {
- if (fold(FnVec, i, j)) {
- LocalChanged = true;
- FnVec.erase(FnVec.begin() + j);
- --j, --e;
- }
- }
- }
- }
+ FnSetType FnSet;
+ for (unsigned i = 0, e = Funcs.size(); i != e;) {
+ Function *F = Funcs[i];
+ ComparableFunction *NewF = new ComparableFunction(F, TD);
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (!Result.second) {
+ ComparableFunction *&OldF = *Result.first;
+ assert(OldF && "Expected a hash collision");
+
+ // NewF will be deleted in favour of OldF unless NewF is strong and
+ // OldF is weak in which case swap them to keep the strong definition.
+
+ if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
+ std::swap(OldF, NewF);
+
+ DEBUG(dbgs() << " " << OldF->Func->getName() << " == "
+ << NewF->Func->getName() << '\n');
+
+ Funcs.erase(Funcs.begin() + i);
+ --e;
+
+ Function *DeleteF = NewF->Func;
+ delete NewF;
+ MergeTwoFunctions(OldF->Func, DeleteF);
+ LocalChanged = true;
+ Changed = true;
+ } else {
+ ++i;
+ }
}
- Changed |= LocalChanged;
+ DeleteContainerPointers(FnSet);
} while (LocalChanged);
return Changed;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 6b9814ceb876..432f7c53a67d 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,7 @@ namespace {
struct PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(&ID) {}
+ PartialInliner() : ModulePass(ID) {}
bool runOnModule(Module& M);
@@ -40,7 +40,8 @@ namespace {
}
char PartialInliner::ID = 0;
-static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner");
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false);
ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
@@ -67,7 +68,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// Clone the function, so that we can hack away on it.
ValueMap<const Value*, Value*> VMap;
- Function* duplicateFunction = CloneFunction(F, VMap);
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(duplicateFunction);
BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
@@ -159,7 +161,7 @@ bool PartialInliner::runOnModule(Module& M) {
bool recursive = false;
for (Function::use_iterator UI = currFunc->use_begin(),
UE = currFunc->use_end(); UI != UE; ++UI)
- if (Instruction* I = dyn_cast<Instruction>(UI))
+ if (Instruction* I = dyn_cast<Instruction>(*UI))
if (I->getParent()->getParent() == currFunc) {
recursive = true;
break;
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
index 58e14481b0ed..4a99a411ab33 100644
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -50,14 +50,14 @@ namespace {
int scanDistribution(Function&, int, std::map<Constant*, int>&);
public :
static char ID; // Pass identification, replacement for typeid
- PartSpec() : ModulePass(&ID) {}
+ PartSpec() : ModulePass(ID) {}
bool runOnModule(Module &M);
};
}
char PartSpec::ID = 0;
-static RegisterPass<PartSpec>
-X("partialspecialization", "Partial Specialization");
+INITIALIZE_PASS(PartSpec, "partialspecialization",
+ "Partial Specialization", false, false);
// Specialize F by replacing the arguments (keys) in replacements with the
// constants (values). Replace all calls to F with those constants with
@@ -74,7 +74,8 @@ SpecializeFunction(Function* F,
deleted[arg->getArgNo()] = arg;
}
- Function* NF = CloneFunction(F, replacements);
+ Function* NF = CloneFunction(F, replacements,
+ /*ModuleLevelChanges=*/false);
NF->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(NF);
@@ -82,10 +83,10 @@ SpecializeFunction(Function* F,
ii != ee; ) {
Value::use_iterator i = ii;
++ii;
- if (isa<CallInst>(i) || isa<InvokeInst>(i)) {
- CallSite CS(cast<Instruction>(i));
+ User *U = *i;
+ CallSite CS(U);
+ if (CS) {
if (CS.getCalledFunction() == F) {
-
SmallVector<Value*, 6> args;
// Assemble the non-specialized arguments for the updated callsite.
// In the process, make sure that the specialized arguments are
@@ -105,13 +106,13 @@ SpecializeFunction(Function* F,
}
}
Value* NCall;
- if (CallInst *CI = dyn_cast<CallInst>(i)) {
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
NCall = CallInst::Create(NF, args.begin(), args.end(),
CI->getName(), CI);
cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
} else {
- InvokeInst *II = cast<InvokeInst>(i);
+ InvokeInst *II = cast<InvokeInst>(U);
NCall = InvokeInst::Create(NF, II->getNormalDest(),
II->getUnwindDest(),
args.begin(), args.end(),
@@ -123,8 +124,7 @@ SpecializeFunction(Function* F,
++numReplaced;
}
}
- next_use:
- ;
+ next_use:;
}
return NF;
}
@@ -174,14 +174,14 @@ void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
ui != ue; ++ui) {
bool interesting = false;
-
- if (isa<CmpInst>(ui)) interesting = true;
- else if (isa<CallInst>(ui))
+ User *U = *ui;
+ if (isa<CmpInst>(U)) interesting = true;
+ else if (isa<CallInst>(U))
interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(ui))
+ else if (isa<InvokeInst>(U))
interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(ui)) interesting = true;
- else if (isa<BranchInst>(ui)) interesting = true;
+ else if (isa<SwitchInst>(U)) interesting = true;
+ else if (isa<BranchInst>(U)) interesting = true;
if (interesting) {
args.push_back(std::distance(F.arg_begin(), ii));
@@ -196,14 +196,16 @@ int PartSpec::scanDistribution(Function& F, int arg,
std::map<Constant*, int>& dist) {
bool hasIndirect = false;
int total = 0;
- for(Value::use_iterator ii = F.use_begin(), ee = F.use_end();
- ii != ee; ++ii)
- if ((isa<CallInst>(ii) || isa<InvokeInst>(ii))
- && ii->getOperand(0) == &F) {
- ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))];
+ for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
+ ii != ee; ++ii) {
+ User *U = *ii;
+ CallSite CS(U);
+ if (CS && CS.getCalledFunction() == &F) {
+ ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
++total;
} else
hasIndirect = true;
+ }
// Preserve the original address taken function even if all other uses
// will be specialized.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index de6099cc1daa..09ac76f97964 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -37,7 +37,7 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized");
namespace {
struct PruneEH : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- PruneEH() : CallGraphSCCPass(&ID) {}
+ PruneEH() : CallGraphSCCPass(ID) {}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +48,8 @@ namespace {
}
char PruneEH::ID = 0;
-static RegisterPass<PruneEH>
-X("prune-eh", "Remove unused exception handling info");
+INITIALIZE_PASS(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false);
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 4566a7634af5..ee10ad0b8ba2 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,15 +29,15 @@ namespace {
class StripDeadPrototypesPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(&ID) { }
+ StripDeadPrototypesPass() : ModulePass(ID) { }
virtual bool runOnModule(Module &M);
};
} // end anonymous namespace
char StripDeadPrototypesPass::ID = 0;
-static RegisterPass<StripDeadPrototypesPass>
-X("strip-dead-prototypes", "Strip Unused Function Prototypes");
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false);
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 12e8db8b4a54..20b7b8f2b850 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripSymbols(bool ODI = false)
- : ModulePass(&ID), OnlyDebugInfo(ODI) {}
+ : ModulePass(ID), OnlyDebugInfo(ODI) {}
virtual bool runOnModule(Module &M);
@@ -52,7 +52,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripNonDebugSymbols()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -65,7 +65,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDebugDeclare()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -78,7 +78,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDeadDebugInfo()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -89,32 +89,33 @@ namespace {
}
char StripSymbols::ID = 0;
-static RegisterPass<StripSymbols>
-X("strip", "Strip all symbols from a module");
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false);
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
}
char StripNonDebugSymbols::ID = 0;
-static RegisterPass<StripNonDebugSymbols>
-Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module");
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false);
ModulePass *llvm::createStripNonDebugSymbolsPass() {
return new StripNonDebugSymbols();
}
char StripDebugDeclare::ID = 0;
-static RegisterPass<StripDebugDeclare>
-Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics");
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false);
ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
}
char StripDeadDebugInfo::ID = 0;
-static RegisterPass<StripDeadDebugInfo>
-A("strip-dead-debug-info", "Strip debug info for unused symbols");
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false);
ModulePass *llvm::createStripDeadDebugInfoPass() {
return new StripDeadDebugInfo();
@@ -254,14 +255,15 @@ static bool StripDebugInfo(Module &M) {
}
}
- unsigned MDDbgKind = M.getMDKindID("dbg");
for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
++FI)
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
++BI) {
- Changed = true; // FIXME: Only set if there was debug metadata.
- BI->setMetadata(MDDbgKind, 0);
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
}
return Changed;
@@ -348,8 +350,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
E = MDs.end(); I != E; ++I) {
- if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(),
- true)) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
if (!NMD)
NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
NMD->addOperand(*I);
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index a74686f408b6..b82b03f7d9e7 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -1,4 +1,4 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -50,20 +50,19 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(&ID) {}
+ SRETPromotion() : CallGraphSCCPass(ID) {}
private:
CallGraphNode *PromoteReturn(CallGraphNode *CGN);
bool isSafeToUpdateAllCallers(Function *F);
Function *cloneFunctionBody(Function *F, const StructType *STy);
CallGraphNode *updateCallSites(Function *F, Function *NF);
- bool nestedStructType(const StructType *STy);
};
}
char SRETPromotion::ID = 0;
-static RegisterPass<SRETPromotion>
-X("sretpromotion", "Promote sret arguments to multiple ret values");
+INITIALIZE_PASS(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false);
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
@@ -156,7 +155,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
FnUseI != FnUseE; ++FnUseI) {
// The function is passed in as an argument to (possibly) another function,
// we can't change it!
- CallSite CS = CallSite::get(*FnUseI);
+ CallSite CS(*FnUseI);
Instruction *Call = CS.getInstruction();
// The function is used by something else than a call or invoke instruction,
// we can't change it!
@@ -187,7 +186,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
return false;
for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
GEPI != GEPE; ++GEPI)
- if (!isa<LoadInst>(GEPI))
+ if (!isa<LoadInst>(*GEPI))
return false;
}
// Any other FirstArg users make this function unsuitable for sret
@@ -271,7 +270,7 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
while (!F->use_empty()) {
- CallSite CS = CallSite::get(*F->use_begin());
+ CallSite CS(*F->use_begin());
Instruction *Call = CS.getInstruction();
const AttrListPtr &PAL = F->getAttributes();
@@ -351,14 +350,3 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
return NF_CGN;
}
-/// nestedStructType - Return true if STy includes any
-/// other aggregate types
-bool SRETPromotion::nestedStructType(const StructType *STy) {
- unsigned Num = STy->getNumElements();
- for (unsigned i = 0; i < Num; i++) {
- const Type *Ty = STy->getElementType(i);
- if (!Ty->isSingleValueType() && !Ty->isVoidTy())
- return true;
- }
- return false;
-}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 24e052881a9d..6f9609cf997b 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -81,7 +81,7 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
public:
virtual bool runOnFunction(Function &F);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5876f408343b..19a05bfe9bba 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -474,19 +474,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
- // (icmp eq (A & (C1|C2)), (C1|C2))
+ // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Instruction *I1 = dyn_cast<Instruction>(Val);
- Instruction *I2 = dyn_cast<Instruction>(Val2);
- if (I1 && I1->getOpcode() == Instruction::And &&
- I2 && I2->getOpcode() == Instruction::And &&
- I1->getOperand(0) == I1->getOperand(0)) {
- ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1));
- ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1));
- if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() &&
- CI1->getValue().operator&(CI2->getValue()) == 0) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
}
}
@@ -1170,11 +1167,28 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
if (LHSCst == 0 || RHSCst == 0) return 0;
- // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- if (LHSCst == RHSCst && LHSCC == RHSCC &&
- LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
+ // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
+ Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
+ }
+ }
+ }
}
// From here on, we only handle:
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 85251a83d4ea..0ebe3b45589e 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -96,14 +96,23 @@ static unsigned EnforceKnownAlignment(Value *V,
/// increase the alignment of the ultimate object, making this check succeed.
unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign) {
- unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
- sizeof(PrefAlign) * CHAR_BIT;
+ assert(V->getType()->isPointerTy() &&
+ "GetOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
if (PrefAlign > Align)
Align = EnforceKnownAlignment(V, Align, PrefAlign);
@@ -529,7 +538,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
- UndefValue::get(II->getCalledValue()->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
@@ -630,8 +639,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
APInt DemandedElts(VWidth, 1);
APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ DemandedElts, UndefElts)) {
II->setArgOperand(0, V);
return II;
}
@@ -655,8 +664,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
- Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType());
- Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType());
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
@@ -772,13 +783,15 @@ protected:
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
if (SizeCI->isAllOnesValue())
return true;
if (isString)
return SizeCI->getZExtValue() >=
- GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset));
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)))
+ GetStringLength(CI->getArgOperand(SizeArgOp));
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
return false;
@@ -1140,7 +1153,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
- Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 505a0bf8f4e7..79a9b09c64d0 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -396,6 +396,11 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
case Instruction::Trunc:
// trunc(trunc(x)) -> trunc(x)
return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
@@ -454,6 +459,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ Src->hasOneUse()) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
return 0;
}
@@ -538,8 +566,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
- else
- return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
}
}
}
@@ -1097,6 +1124,38 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
break;
}
}
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() &&
+ Call->getCalledFunction()->getName() == "sqrt" &&
+ Call->getNumArgOperands() == 1) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
return 0;
}
@@ -1308,6 +1367,199 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
return new ShuffleVectorInst(InVal, V2, Mask);
}
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
@@ -1359,6 +1611,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
((Instruction*)NULL));
}
}
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
@@ -1368,16 +1625,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
- // If this is a cast from an integer to vector, check to see if the input
- // is a trunc or zext of a bitcast from vector. If so, we can replace all
- // the casts with a shuffle and (potentially) a bitcast.
- if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
- CastInst *SrcCast = cast<CastInst>(Src);
- if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
- if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
- return I;
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6c00586412ac..d7e2b72b7fac 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1374,7 +1374,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
case Instruction::Or:
// If bits are being or'd in that are not present in the constant we
// are comparing against, then the comparison could never succeed!
- if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
return ReplaceInstUsesWith(ICI,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 8933a0b137ab..b68fbc2db5c9 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -146,10 +146,14 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
- if (KnownAlign >
- (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
- LI.getAlignment()))
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
}
// load (cast X) --> cast (load X) iff safe.
@@ -369,7 +373,7 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
return DI;
if (isa<BitCastInst>(U) && U->hasOneUse()) {
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin()))
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
return DI;
}
}
@@ -411,10 +415,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
- if (KnownAlign >
- (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
- SI.getAlignment()))
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
}
// Do really simple DSE, to catch cases where there are several consecutive
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f9ffdb10f266..c44fe9db6e3a 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -699,34 +699,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
SI.setOperand(2, TrueVal);
return &SI;
}
-
- // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T
- // Note: This is a canonicalization rather than an optimization, and is used
- // to expose opportunities to other instcombine transforms.
- Instruction* CondInst = dyn_cast<Instruction>(CondVal);
- if (CondInst && CondInst->hasOneUse() &&
- CondInst->getOpcode() == Instruction::Or) {
- ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0));
- ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1));
- if (LHSCmp && LHSCmp->hasOneUse() &&
- LHSCmp->getPredicate() == ICmpInst::ICMP_EQ &&
- RHSCmp && RHSCmp->hasOneUse() &&
- RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) {
- ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1));
- ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1));
- if (C1 && C1->isZero() && C2 && C2->isZero()) {
- LHSCmp->setPredicate(ICmpInst::ICMP_NE);
- RHSCmp->setPredicate(ICmpInst::ICMP_NE);
- Value *And =
- InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp,
- "and."+CondVal->getName()), SI);
- SI.setOperand(0, And);
- SI.setOperand(1, FalseVal);
- SI.setOperand(2, TrueVal);
- return &SI;
- }
- }
- }
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index e5ce8a612f3f..27716b886a22 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -56,10 +56,270 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return 0;
}
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned HighBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(TypeWidth, HighBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, LowBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+ case Instruction::LShr: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
@@ -288,39 +548,17 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
ConstantInt::get(Ty, AmtSum));
}
- if (ShiftOp->getOpcode() == Instruction::LShr &&
- I.getOpcode() == Instruction::AShr) {
- if (AmtSum >= TypeBits)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
- return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
- }
-
- if (ShiftOp->getOpcode() == Instruction::AShr &&
- I.getOpcode() == Instruction::LShr) {
- // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
- if (AmtSum >= TypeBits)
- AmtSum = TypeBits-1;
-
- Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
-
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::CreateAnd(Shift,
- ConstantInt::get(I.getContext(), Mask));
- }
-
- // Okay, if we get here, one shift must be left, and the other shift must be
- // right. See if the amounts are equal.
if (ShiftAmt1 == ShiftAmt2) {
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(),Mask));
}
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(), Mask));
@@ -329,7 +567,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
// (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
@@ -340,7 +579,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
@@ -355,9 +595,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
- assert(ShiftOp->getOpcode() == Instruction::LShr ||
- ShiftOp->getOpcode() == Instruction::AShr);
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
ConstantInt::get(Ty, ShiftDiff));
@@ -367,8 +606,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
- assert(ShiftOp->getOpcode() == Instruction::Shl);
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index af2958fe3d91..e46c67994e2b 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -60,8 +60,8 @@ STATISTIC(NumSunkInst , "Number of instructions sunk");
char InstCombiner::ID = 0;
-static RegisterPass<InstCombiner>
-X("instcombine", "Combine redundant instructions");
+INITIALIZE_PASS(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false);
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 9ae378670715..a77d70cd1c1b 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -34,7 +34,7 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(&ID) {}
+ EdgeProfiler() : ModulePass(ID) {}
virtual const char *getPassName() const {
return "Edge Profiler";
@@ -43,8 +43,8 @@ namespace {
}
char EdgeProfiler::ID = 0;
-static RegisterPass<EdgeProfiler>
-X("insert-edge-profiling", "Insert instrumentation for edge profiling");
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+ "Insert instrumentation for edge profiling", false, false);
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 41e3a39f2685..8eec9872812d 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -36,7 +36,7 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(&ID) {}
+ OptimalEdgeProfiler() : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +50,9 @@ namespace {
}
char OptimalEdgeProfiler::ID = 0;
-static RegisterPass<OptimalEdgeProfiler>
-X("insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling");
+INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false);
ModulePass *llvm::createOptimalEdgeProfilerPass() {
return new OptimalEdgeProfiler();
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
deleted file mode 100644
index dcf14a6860da..000000000000
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ /dev/null
@@ -1,1112 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
- ABCD() : FunctionPass(&ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
-
- bool runOnFunction(Function &F);
-
- private:
- /// Keep track of whether we've modified the program yet.
- bool modified;
-
- enum ProveResult {
- False = 0,
- Reduced = 1,
- True = 2
- };
-
- typedef ProveResult (*meet_function)(ProveResult, ProveResult);
- static ProveResult max(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::max(res1, res2);
- }
- static ProveResult min(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::min(res1, res2);
- }
-
- class Bound {
- public:
- Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
- Bound(const Bound &b, int cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
- Bound(const Bound &b, const APInt &cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
- /// Test if Bound is an upper bound
- bool isUpperBound() const { return upper_bound; }
-
- /// Get the bitwidth of this bound
- int32_t getBitWidth() const { return value.getBitWidth(); }
-
- /// Creates a Bound incrementing the one received
- static Bound createIncrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
- b.upper_bound);
- }
-
- /// Creates a Bound decrementing the one received
- static Bound createDecrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
- b.upper_bound);
- }
-
- /// Test if two bounds are equal
- static bool eq(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->value == b->value;
- }
-
- /// Test if val is less than or equal to Bound b
- static bool leq(APInt val, const Bound &b) {
- return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
- }
-
- /// Test if Bound a is less then or equal to Bound
- static bool leq(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.sle(b.value) :
- a.value.sge(b.value);
- }
-
- /// Test if Bound a is less then Bound b
- static bool lt(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.slt(b.value) :
- a.value.sgt(b.value);
- }
-
- /// Test if Bound b is greater then or equal val
- static bool geq(const Bound &b, APInt val) {
- return leq(val, b);
- }
-
- /// Test if Bound a is greater then or equal Bound b
- static bool geq(const Bound &a, const Bound &b) {
- return leq(b, a);
- }
-
- private:
- APInt value;
- bool upper_bound;
- };
-
- /// This class is used to store results some parts of the graph,
- /// so information does not need to be recalculated. The maximum false,
- /// minimum true and minimum reduced results are stored
- class MemoizedResultChart {
- public:
- MemoizedResultChart() {}
- MemoizedResultChart(const MemoizedResultChart &other) {
- if (other.max_false)
- max_false.reset(new Bound(*other.max_false));
- if (other.min_true)
- min_true.reset(new Bound(*other.min_true));
- if (other.min_reduced)
- min_reduced.reset(new Bound(*other.min_reduced));
- }
-
- /// Returns the max false
- const Bound *getFalse() const { return max_false.get(); }
-
- /// Returns the min true
- const Bound *getTrue() const { return min_true.get(); }
-
- /// Returns the min reduced
- const Bound *getReduced() const { return min_reduced.get(); }
-
- /// Return the stored result for this bound
- ProveResult getResult(const Bound &bound) const;
-
- /// Stores a false found
- void addFalse(const Bound &bound);
-
- /// Stores a true found
- void addTrue(const Bound &bound);
-
- /// Stores a Reduced found
- void addReduced(const Bound &bound);
-
- /// Clears redundant reduced
- /// If a min_true is smaller than a min_reduced then the min_reduced
- /// is unnecessary and then removed. It also works for min_reduced
- /// begin smaller than max_false.
- void clearRedundantReduced();
-
- void clear() {
- max_false.reset();
- min_true.reset();
- min_reduced.reset();
- }
-
- private:
- OwningPtr<Bound> max_false, min_true, min_reduced;
- };
-
- /// This class stores the result found for a node of the graph,
- /// so these results do not need to be recalculated, only searched for.
- class MemoizedResult {
- public:
- /// Test if there is true result stored from b to a
- /// that is less then the bound
- bool hasTrue(Value *b, const Bound &bound) const {
- const Bound *trueBound = map.lookup(b).getTrue();
- return trueBound && Bound::leq(*trueBound, bound);
- }
-
- /// Test if there is false result stored from b to a
- /// that is less then the bound
- bool hasFalse(Value *b, const Bound &bound) const {
- const Bound *falseBound = map.lookup(b).getFalse();
- return falseBound && Bound::leq(*falseBound, bound);
- }
-
- /// Test if there is reduced result stored from b to a
- /// that is less then the bound
- bool hasReduced(Value *b, const Bound &bound) const {
- const Bound *reducedBound = map.lookup(b).getReduced();
- return reducedBound && Bound::leq(*reducedBound, bound);
- }
-
- /// Returns the stored bound for b
- ProveResult getBoundResult(Value *b, const Bound &bound) {
- return map[b].getResult(bound);
- }
-
- /// Clears the map
- void clear() {
- DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
- DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
- for (; begin != end; ++begin) {
- begin->second.clear();
- }
- map.clear();
- }
-
- /// Stores the bound found
- void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
- private:
- // Maps a nod in the graph with its results found.
- DenseMap<Value*, MemoizedResultChart> map;
- };
-
- /// This class represents an edge in the inequality graph used by the
- /// ABCD algorithm. An edge connects node v to node u with a value c if
- /// we could infer a constraint v <= u + c in the source program.
- class Edge {
- public:
- Edge(Value *V, APInt val, bool upper)
- : vertex(V), value(val), upper_bound(upper) {}
-
- Value *getVertex() const { return vertex; }
- const APInt &getValue() const { return value; }
- bool isUpperBound() const { return upper_bound; }
-
- private:
- Value *vertex;
- APInt value;
- bool upper_bound;
- };
-
- /// Weighted and Directed graph to represent constraints.
- /// There is one type of constraint, a <= b + X, which will generate an
- /// edge from b to a with weight X.
- class InequalityGraph {
- public:
-
- /// Adds an edge from V_from to V_to with weight value
- void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
- /// Test if there is a node V
- bool hasNode(Value *V) const { return graph.count(V); }
-
- /// Test if there is any edge from V in the upper direction
- bool hasEdge(Value *V, bool upper) const;
-
- /// Returns all edges pointed by vertex V
- SmallVector<Edge, 16> getEdges(Value *V) const {
- return graph.lookup(V);
- }
-
- /// Prints the graph in dot format.
- /// Blue edges represent upper bound and Red lower bound.
- void printGraph(raw_ostream &OS, Function &F) const {
- printHeader(OS, F);
- printBody(OS);
- printFooter(OS);
- }
-
- /// Clear the graph
- void clear() {
- graph.clear();
- }
-
- private:
- DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
- /// Prints the header of the dot file
- void printHeader(raw_ostream &OS, Function &F) const;
-
- /// Prints the footer of the dot file
- void printFooter(raw_ostream &OS) const {
- OS << "}\n";
- }
-
- /// Prints the body of the dot file
- void printBody(raw_ostream &OS) const;
-
- /// Prints vertex source to the dot file
- void printVertex(raw_ostream &OS, Value *source) const;
-
- /// Prints the edge to the dot file
- void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
- void printName(raw_ostream &OS, Value *info) const;
- };
-
- /// Iterates through all BasicBlocks, if the Terminator Instruction
- /// uses an Comparator Instruction, all operands of this comparator
- /// are sent to be transformed to SSI. Only Instruction operands are
- /// transformed.
- void createSSI(Function &F);
-
- /// Creates the graphs for this function.
- /// It will look for all comparators used in branches, and create them.
- /// These comparators will create constraints for any instruction as an
- /// operand.
- void executeABCD(Function &F);
-
- /// Seeks redundancies in the comparator instruction CI.
- /// If the ABCD algorithm can prove that the comparator CI always
- /// takes one way, then the Terminator Instruction TI is substituted from
- /// a conditional branch to a unconditional one.
- /// This code basically receives a comparator, and verifies which kind of
- /// instruction it is. Depending on the kind of instruction, we use different
- /// strategies to prove its redundancy.
- void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Substitutes Terminator Instruction TI, that is a conditional branch,
- /// with one unconditional branch. Succ_edge determines if the new
- /// unconditional edge will be the first or second edge of the former TI
- /// instruction.
- void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
- /// When an conditional branch is removed, the BasicBlock that is no longer
- /// reachable will have problems in phi functions. This method fixes these
- /// phis removing the former BasicBlock from the list of incoming BasicBlocks
- /// of all phis. In case the phi remains with no predecessor it will be
- /// marked to be removed later.
- void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
- /// Removes phis that have no predecessor
- void removePhis();
-
- /// Creates constraints for Instructions.
- /// If the constraint for this instruction has already been created
- /// nothing is done.
- void createConstraintInstruction(Instruction *I);
-
- /// Creates constraints for Binary Operators.
- /// It will create constraints only for addition and subtraction,
- /// the other binary operations are not treated by ABCD.
- /// For additions in the form a = b + X and a = X + b, where X is a constant,
- /// the constraint a <= b + X can be obtained. For this constraint, an edge
- /// a->b with weight X is added to the lower bound graph, and an edge
- /// b->a with weight -X is added to the upper bound graph.
- /// Only subtractions in the format a = b - X is used by ABCD.
- /// Edges are created using the same semantic as addition.
- void createConstraintBinaryOperator(BinaryOperator *BO);
-
- /// Creates constraints for Comparator Instructions.
- /// Only comparators that have any of the following operators
- /// are used to create constraints: >=, >, <=, <. And only if
- /// at least one operand is an Instruction. In a Comparator Instruction
- /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
- /// t and f represent sigma for operands in true and false branches. The
- /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
- /// b_f <= b. There are two more constraints that depend on the operator.
- /// For the operator <= : a_t <= b_t and b_f <= a_f-1
- /// For the operator < : a_t <= b_t-1 and b_f <= a_f
- /// For the operator >= : b_t <= a_t and a_f <= b_f-1
- /// For the operator > : b_t <= a_t-1 and a_f <= b_f
- void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Creates constraints for PHI nodes.
- /// In a PHI node a = phi(b,c) we can create the constraint
- /// a<= max(b,c). With this constraint there will be the edges,
- /// b->a and c->a with weight 0 in the lower bound graph, and the edges
- /// a->b and a->c with weight 0 in the upper bound graph.
- void createConstraintPHINode(PHINode *PN);
-
- /// Given a binary operator, we are only interest in the case
- /// that one operand is an Instruction and the other is a ConstantInt. In
- /// this case the method returns true, otherwise false. It also obtains the
- /// Instruction and ConstantInt from the BinaryOperator and returns it.
- bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
-
- /// This method creates a constraint between a Sigma and an Instruction.
- /// These constraints are created as soon as we find a comparator that uses a
- /// SSI variable.
- void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f);
-
- /// If PN_op1 and PN_o2 are different from NULL, create a constraint
- /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
- /// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value);
-
- /// Returns the sigma representing the Instruction I in BasicBlock BB.
- /// Returns NULL in case there is no sigma for this Instruction in this
- /// Basic Block. This methods assume that sigmas are the first instructions
- /// in a block, and that there can be only two sigmas in a block. So it will
- /// only look on the first two instructions of BasicBlock BB.
- PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
- /// Original ABCD algorithm to prove redundant checks.
- /// This implementation works on any kind of inequality branch.
- bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
- /// Prove that distance between b and a is <= bound
- ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
- /// Updates the distance value for a and b
- void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
- meet_function meet);
-
- InequalityGraph inequality_graph;
- MemoizedResult mem_result;
- DenseMap<Value*, const Bound*> active;
- SmallPtrSet<Value*, 16> created;
- SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-} // end anonymous namespace.
-
-char ABCD::ID = 0;
-static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand");
-
-
-bool ABCD::runOnFunction(Function &F) {
- modified = false;
- createSSI(F);
- executeABCD(F);
- DEBUG(inequality_graph.printGraph(dbgs(), F));
- removePhis();
-
- inequality_graph.clear();
- mem_result.clear();
- active.clear();
- created.clear();
- phis_to_remove.clear();
- return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
- SSI *ssi = &getAnalysis<SSI>();
-
- SmallVector<Instruction *, 16> Insts;
-
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
- modified = true; // XXX: but yet createSSI might do nothing
- Insts.push_back(I);
- }
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
- modified = true;
- Insts.push_back(I);
- }
- }
- }
- ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
- continue;
-
- createConstraintCmpInst(ICI, TI);
- seekRedundancy(ICI, TI);
- }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
- CmpInst::Predicate Pred = ICI->getPredicate();
-
- Value *source, *dest;
- int distance1, distance2;
- bool upper;
-
- switch(Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- upper = false;
- distance1 = 1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- upper = false;
- distance1 = 0;
- distance2 = -1;
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- upper = true;
- distance1 = -1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- upper = true;
- distance1 = 0;
- distance2 = 1;
- break;
-
- default:
- return;
- }
-
- ++NumBranchTested;
- source = ICI->getOperand(0);
- dest = ICI->getOperand(1);
- if (demandProve(dest, source, distance1, upper)) {
- removeRedundancy(TI, true);
- } else if (demandProve(dest, source, distance2, !upper)) {
- removeRedundancy(TI, false);
- }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
- BasicBlock *Succ;
- if (Succ_edge) {
- Succ = TI->getSuccessor(0);
- fixPhi(TI->getParent(), TI->getSuccessor(1));
- } else {
- Succ = TI->getSuccessor(1);
- fixPhi(TI->getParent(), TI->getSuccessor(0));
- }
-
- BranchInst::Create(Succ, TI);
- TI->eraseFromParent(); // XXX: invoke
- ++NumBranchRemoved;
- modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
- BasicBlock::iterator begin = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
- PN->removeIncomingValue(BB, false);
- if (PN->getNumIncomingValues() == 0)
- phis_to_remove.push_back(PN);
- }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
- for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
- PHINode *PN = phis_to_remove[i];
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
- }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
- // Test if this instruction has not been created before
- if (created.insert(I)) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- createConstraintBinaryOperator(BO);
- } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
- createConstraintPHINode(PN);
- }
- }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
- Instruction *I1 = NULL, *I2 = NULL;
- ConstantInt *CI1 = NULL, *CI2 = NULL;
-
- // Test if an operand is an Instruction and the other is a Constant
- if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
- return;
-
- Instruction *I = 0;
- APInt value;
-
- switch (BO->getOpcode()) {
- case Instruction::Add:
- if (I1) {
- I = I1;
- value = CI2->getValue();
- } else if (I2) {
- I = I2;
- value = CI1->getValue();
- }
- break;
-
- case Instruction::Sub:
- // Instructions like a = X-b, where X is a constant are not represented
- // in the graph.
- if (!I1)
- return;
-
- I = I1;
- value = -CI2->getValue();
- break;
-
- default:
- return;
- }
-
- inequality_graph.addEdge(I, BO, value, true);
- inequality_graph.addEdge(BO, I, -value, false);
- createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2) {
- Value *op1 = BO->getOperand(0);
- Value *op2 = BO->getOperand(1);
-
- if ((*I1 = dyn_cast<Instruction>(op1))) {
- if ((*C2 = dyn_cast<ConstantInt>(op2)))
- return true; // First is Instruction and second ConstantInt
-
- return false; // Both are Instruction
- } else {
- if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
- (*I2 = dyn_cast<Instruction>(op2)))
- return true; // First is ConstantInt and second Instruction
-
- return false; // Both are not Instruction
- }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t and b_f <= a_f-1
-/// For the operator < : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t and a_f <= b_f-1
-/// For the operator > : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
- Value *V_op1 = ICI->getOperand(0);
- Value *V_op2 = ICI->getOperand(1);
-
- if (!V_op1->getType()->isIntegerTy())
- return;
-
- Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
- Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
- // Test if at least one operand is an Instruction
- if (!I_op1 && !I_op2)
- return;
-
- BasicBlock *BB_succ_t = TI->getSuccessor(0);
- BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
- PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
- *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
- createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
- createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
- int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
- APInt MinusOne = APInt::getAllOnesValue(width);
- APInt Zero = APInt::getNullValue(width);
-
- CmpInst::Predicate Pred = ICI->getPredicate();
- ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
- ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
- switch (Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
- break;
-
- default:
- break;
- }
-
- if (I_op1)
- createConstraintInstruction(I_op1);
- if (I_op2)
- createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
- // FIXME: We really want to disallow sigma nodes, but I don't know the best
- // way to detect the other than this.
- if (PN->getNumOperands() == 2) return;
-
- int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- createConstraintInstruction(I);
- }
- inequality_graph.addEdge(V, PN, APInt(width, 0), true);
- inequality_graph.addEdge(V, PN, APInt(width, 0), false);
- }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f) {
- *SIG_op_t = findSigma(BB_succ_t, I_op);
- *SIG_op_f = findSigma(BB_succ_f, I_op);
-
- if (*SIG_op_t) {
- int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- }
- if (*SIG_op_f) {
- int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value) {
- if (SIG_op1 && SIG_op2) {
- inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
- } else if (SIG_op1 && V_op2) {
- inequality_graph.addEdge(V_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
- } else if (SIG_op2 && V_op1) {
- inequality_graph.addEdge(SIG_op2, V_op1, value, true);
- inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
- }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
- // BB has more than one predecessor, BB cannot have sigmas.
- if (I == NULL || BB->getSinglePredecessor() == NULL)
- return NULL;
-
- BasicBlock::iterator begin = BB->begin();
- BasicBlock::iterator end = BB->end();
-
- for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
- Instruction *I_succ = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I_succ))
- if (PN->getIncomingValue(0) == I)
- return PN;
- }
-
- return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
- int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
- Bound bound(APInt(width, c), upper_bound);
-
- mem_result.clear();
- active.clear();
-
- ProveResult res = prove(a, b, bound, 0);
- return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
- unsigned level) {
- // if (C[b-a<=e] == True for some e <= bound
- // Same or stronger difference was already proven
- if (mem_result.hasTrue(b, bound))
- return True;
-
- // if (C[b-a<=e] == False for some e >= bound
- // Same or weaker difference was already disproved
- if (mem_result.hasFalse(b, bound))
- return False;
-
- // if (C[b-a<=e] == Reduced for some e <= bound
- // b is on a cycle that was reduced for same or stronger difference
- if (mem_result.hasReduced(b, bound))
- return Reduced;
-
- // traversal reached the source vertex
- if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
- return True;
-
- // if b has no predecessor then fail
- if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
- return False;
-
- // a cycle was encountered
- if (active.count(b)) {
- if (Bound::leq(*active.lookup(b), bound))
- return Reduced; // a "harmless" cycle
-
- return False; // an amplifying cycle
- }
-
- active[b] = &bound;
- PHINode *PN = dyn_cast<PHINode>(b);
-
- // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
- // value, then it is a phi, if it has 1 incoming value it is a sigma.
- if (PN && PN->getNumIncomingValues() > 1)
- updateMemDistance(a, b, bound, level, min);
- else
- updateMemDistance(a, b, bound, level, max);
-
- active.erase(b);
-
- ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
- return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
- unsigned level, meet_function meet) {
- ABCD::ProveResult res = (meet == max) ? False : True;
-
- SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
- SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
- for (; begin != end ; ++begin) {
- if (((res >= Reduced) && (meet == max)) ||
- ((res == False) && (meet == min))) {
- break;
- }
- const Edge &in = *begin;
- if (in.isUpperBound() == bound.isUpperBound()) {
- Value *succ = in.getVertex();
- res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
- level+1));
- }
- }
-
- mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
- if (max_false && Bound::leq(bound, *max_false))
- return False;
- if (min_true && Bound::leq(*min_true, bound))
- return True;
- if (min_reduced && Bound::leq(*min_reduced, bound))
- return Reduced;
- return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
- if (!max_false || Bound::leq(*max_false, bound))
- max_false.reset(new Bound(bound));
-
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
- if (Bound::eq(max_false.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
- if (!min_true || Bound::leq(bound, *min_true))
- min_true.reset(new Bound(bound));
-
- if (Bound::eq(min_true.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
- if (Bound::eq(min_true.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
- if (!min_reduced || Bound::leq(bound, *min_reduced))
- min_reduced.reset(new Bound(bound));
-
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
- if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
- min_reduced.reset();
- if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
- min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
- const ProveResult res) {
- if (res == False) {
- map[b].addFalse(bound);
- } else if (res == True) {
- map[b].addTrue(bound);
- } else {
- map[b].addReduced(bound);
- }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
- APInt value, bool upper) {
- assert(V_from->getType() == V_to->getType());
- assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
- value.getBitWidth());
-
- graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
- SmallVector<Edge, 16> it = graph.lookup(V);
-
- SmallVector<Edge, 16>::iterator begin = it.begin();
- SmallVector<Edge, 16>::iterator end = it.end();
- for (; begin != end; ++begin) {
- if (begin->isUpperBound() == upper) {
- return true;
- }
- }
- return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
- OS << "digraph dotgraph {\n";
- OS << "label=\"Inequality Graph for \'";
- OS << F.getNameStr() << "\' function\";\n";
- OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
- graph.begin(), end = graph.end();
-
- for (; begin != end ; ++begin) {
- SmallVector<Edge, 16>::const_iterator begin_par =
- begin->second.begin(), end_par = begin->second.end();
- Value *source = begin->first;
-
- printVertex(OS, source);
-
- for (; begin_par != end_par ; ++begin_par) {
- const Edge &edge = *begin_par;
- printEdge(OS, source, edge);
- }
- }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " [label=\"{";
- printName(OS, source);
- OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
- const Edge &edge) const {
- Value *dest = edge.getVertex();
- APInt value = edge.getValue();
- bool upper = edge.isUpperBound();
-
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " -> ";
- OS << "\"";
- printName(OS, dest);
- OS << "\"";
- OS << " [label=\"" << value << "\"";
- if (upper) {
- OS << "color=\"blue\"";
- } else {
- OS << "color=\"red\"";
- }
- OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
- OS << *CI;
- } else {
- if (!info->hasName()) {
- info->setName("V");
- }
- OS << info->getNameStr();
- }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
- return new ABCD();
-}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 2d19467ce746..ada086e9db76 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(&ID) {}
+ ADCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function& F);
@@ -45,7 +45,7 @@ namespace {
}
char ADCE::ID = 0;
-static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index 54533f50405f..b144678c6a0e 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(&ID) {}
+ BlockPlacement() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -74,8 +74,8 @@ namespace {
}
char BlockPlacement::ID = 0;
-static RegisterPass<BlockPlacement>
-X("block-placement", "Profile Guided Basic Block Placement");
+INITIALIZE_PASS(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false);
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 1a3b10cc9baa..b7598eace536 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,9 +1,9 @@
add_llvm_library(LLVMScalarOpts
- ABCD.cpp
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
+ CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
GEPSplitter.cpp
@@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
+ LowerAtomic.cpp
MemCpyOptimizer.cpp
Reassociate.cpp
Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 272066c8c0c4..e07b761e589c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -41,6 +42,11 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+ cl::desc("Split critical edges during codegen prepare"),
+ cl::init(true), cl::Hidden);
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +60,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -82,8 +88,8 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-static RegisterPass<CodeGenPrepare> X("codegenprepare",
- "Optimize for code generation");
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false);
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
@@ -548,9 +554,9 @@ protected:
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
- - CallInst::ArgOffset)))
- return SizeCI->isAllOnesValue();
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
return false;
}
};
@@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
// Split all critical edges where the dest block has a PHI.
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
+ if (CriticalEdgeSplit) {
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index ea208135739d..a0ea369d0cad 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(&ID) {}
+ ConstantPropagation() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -45,8 +45,8 @@ namespace {
}
char ConstantPropagation::ID = 0;
-static RegisterPass<ConstantPropagation>
-X("constprop", "Simple constant propagation");
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false);
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 000000000000..0d4e45de3466
--- /dev/null
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,200 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) { }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false);
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+ P->getIncomingBlock(i),
+ BB);
+ if (!C) continue;
+
+ P->setIncomingValue(i, C);
+ Changed = true;
+ }
+
+ if (Value *ConstVal = P->hasConstantValue()) {
+ P->replaceAllUsesWith(ConstVal);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ // Propagating correlated values might leave cruft around.
+ // Try to clean it up before we continue.
+ if (BBChanged)
+ SimplifyInstructionsInBlock(FI);
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 39940c35da5d..87ea8038356a 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,7 @@ namespace {
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(&ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {}
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -56,8 +56,8 @@ namespace {
}
char DeadInstElimination::ID = 0;
-static RegisterPass<DeadInstElimination>
-X("die", "Dead Instruction Elimination");
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false);
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +70,7 @@ namespace {
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(&ID) {}
+ DCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -81,7 +81,7 @@ namespace {
}
char DCE::ID = 0;
-static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e047e4ffa151..c8fd9d9fa556 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -40,7 +40,7 @@ namespace {
TargetData *TD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(&ID) {}
+ DSE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
bool Changed = false;
@@ -82,7 +82,7 @@ namespace {
}
char DSE::ID = 0;
-static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
continue;
- } else if (CallSite::get(BBI).getInstruction() != 0) {
+ } else if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't
// be undeadifying any of our pointers.
- CallSite CS = CallSite::get(BBI);
if (AA.doesNotAccessMemory(CS))
continue;
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
index 610a41dae44b..53dd06d24bb5 100644
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,13 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(&ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {}
};
}
char GEPSplitter::ID = 0;
-static RegisterPass<GEPSplitter> X("split-geps",
- "split complex GEPs into simple GEPs");
+INITIALIZE_PASS(GEPSplitter, "split-geps",
+ "split complex GEPs into simple GEPs", false, false);
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 88b67768fa5d..c62ce1f27f64 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -165,7 +165,6 @@ namespace {
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
- Expression create_expression(Constant* C);
Expression create_expression(ExtractValueInst* C);
Expression create_expression(InsertValueInst* C);
@@ -665,7 +664,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) { }
private:
bool NoLoads;
@@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
-static RegisterPass<GVN> X("gvn",
- "Global Value Numbering");
+INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) {
for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+ if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
if (use_phi->getParent() == inst->getParent())
return false;
@@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
- SSAUpdate.Initialize(LI);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
const Type *LoadTy = LI->getType();
@@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) {
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
uint32_t ValNo = VN.lookup(CurInst);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b5c9dd881df8..af2eafc47cbf 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(&ID) {}
+ IndVarSimplify() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -102,7 +102,7 @@ namespace {
void RewriteNonIntegerIVs(Loop *L);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
@@ -117,8 +117,8 @@ namespace {
}
char IndVarSimplify::ID = 0;
-static RegisterPass<IndVarSimplify>
-X("indvars", "Canonicalize Induction Variables");
+INITIALIZE_PASS(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false);
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() {
/// is actually a much broader range than just linear tests.
ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter) {
@@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = L->getCanonicalInductionVariableIncrement();
+ CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
} else {
// We have to use the preincremented value...
RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
@@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that we know the largest of the induction variable expressions
// in this loop, insert a canonical induction variable of the largest size.
- Value *IndVar = 0;
+ PHINode *IndVar = 0;
if (NeedCannIV) {
// Check to see if the loop already has any canonical-looking induction
// variables. If any are present and wider than the planned canonical
@@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
Value::use_iterator IncrUse = Incr->use_begin();
- Instruction *U1 = cast<Instruction>(IncrUse++);
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
if (IncrUse == Incr->use_end()) return;
- Instruction *U2 = cast<Instruction>(IncrUse++);
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
if (IncrUse != Incr->use_end()) return;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index edce14cd92ea..104d5aecbdd3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -45,7 +46,10 @@ Threshold("jump-threading-threshold",
// Turn on use of LazyValueInfo.
static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+EnableLVI("enable-jump-threading-lvi",
+ cl::desc("Use LVI for jump threading"),
+ cl::init(true),
+ cl::ReallyHidden);
@@ -74,15 +78,32 @@ namespace {
#else
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(&ID) {}
+ JumpThreading() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI)
+ if (EnableLVI) {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ }
}
void FindLoopHeaders(Function &F);
@@ -111,8 +132,8 @@ namespace {
}
char JumpThreading::ID = 0;
-static RegisterPass<JumpThreading>
-X("jump-threading", "Jump Threading");
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+ "Jump Threading", false, false);
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
+ if (LVI) LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) {
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
BasicBlock *Succ = BI->getSuccessor(0);
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ if (LVI) LVI->eraseBlock(BB);
if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
// If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
+// Helper method for ComputeValueKnownInPredecessors. If Value is a
+// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > &Result,
+ Constant *Value, BasicBlock* BB){
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+ Result.push_back(std::make_pair(FoldedCInt, BB));
+ else if (isa<UndefValue>(Value))
+ Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt in any of our
/// predecessors. If so, return the known list of value and pred BB in the
@@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) {
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(CI, *PI));
+
return true;
}
@@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else if (LVI) {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
}
}
+
return !Result.empty();
}
@@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
else
InterestingVal = ConstantInt::getFalse(I->getContext());
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- bool HasValue = false;
- for (unsigned r = 0, e = Result.size(); r != e; ++r)
- if (Result[r].second == RHSVals[i].second) {
- HasValue = true;
- break;
- }
-
- if (!HasValue) {
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
Result.push_back(RHSVals[i]);
Result.back().first = InterestingVal;
}
}
+
return !Result.empty();
}
@@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (Result[i].first)
Result[i].first =
cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
return true;
}
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+ }
+
+ return !Result.empty();
}
// Handle compare with phi operand, where the PHI is defined in this block.
@@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
- if (isa<UndefValue>(Res))
- Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
- Result.push_back(std::make_pair(CI, PredBB));
+ if (Constant *ConstRes = dyn_cast<Constant>(Res))
+ PushConstantIntOrUndef(Result, ConstRes, PredBB);
}
return !Result.empty();
@@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy() && // Not vector compare.
- (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
- Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+ Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate
- Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, P, BB);
- if (Res == LazyValueInfo::Unknown)
- continue;
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ return !Result.empty();
}
-
- return !Result.empty();
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (LVI) {
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
}
+
+ return !Result.empty();
}
+
return false;
}
@@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ if (LVI) LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
}
+
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
+ CondCmp->getPredicate(),
+ CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< ", across block:\n "
<< *BB << "\n");
+ if (LVI)
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
@@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 73473952912e..2ef85446bd9b 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -26,8 +26,7 @@
// pointer. There are no calls in the loop which mod/ref the pointer.
// If these conditions are true, we can promote the loads and stores in the
// loop of the pointer to use a temporary alloca'd variable. We then use
-// the mem2reg functionality to construct the appropriate SSA form for the
-// variable.
+// the SSAUpdater to construct the appropriate SSA form for the value.
//
//===----------------------------------------------------------------------===//
@@ -37,14 +36,15 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(&ID) {}
+ LICM() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -75,39 +75,31 @@ namespace {
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg)
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreservedID(LoopSimplifyID);
}
bool doFinalization() {
- // Free the values stored in the map
- for (std::map<Loop *, AliasSetTracker *>::iterator
- I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
- delete I->second;
-
- LoopToAliasMap.clear();
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
private:
- // Various analyses that we use...
AliasAnalysis *AA; // Current AliasAnalysis information
LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop...
- DominanceFrontier *DF; // Current Dominance Frontier
+ DominatorTree *DT; // Dominator Tree for the current Loop.
- // State that is updated as we process loops
+ // State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
- std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -204,25 +196,12 @@ namespace {
bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
- /// to scalars as we can.
- ///
- void PromoteValuesInLoop();
-
- /// FindPromotableValuesInLoop - Check the current loop for stores to
- /// definite pointers, which are not loaded and stored through may aliases.
- /// If these are found, create an alloca for the value, add it to the
- /// PromotedValues list, and keep track of the mapping from value to
- /// alloca...
- ///
- void FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &Val2AlMap);
+ void PromoteAliasSet(AliasSet &AS);
};
}
char LICM::ID = 0;
-static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DF = &getAnalysis<DominanceFrontier>();
DT = &getAnalysis<DominatorTree>();
CurAST = new AliasSetTracker(*AA);
- // Collect Alias info from subloops
+ // Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
LoopItr != LoopItrE; ++LoopItr) {
Loop *InnerL = *LoopItr;
- AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
- assert (InnerAST && "Where is my AST?");
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
// What if InnerLoop was modified by other passes ?
CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
}
CurLoop = L;
@@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
- // memory references to scalars that we can...
- if (!DisablePromotion && Preheader && L->hasDedicatedExits())
- PromoteValuesInLoop();
-
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
- LoopToAliasMap[L] = CurAST;
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
return Changed;
}
@@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
- // We are processing blocks in reverse dfo, so process children first...
+ // We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
SinkRegion(Children[i]);
@@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are
@@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
Instruction &I = *II++;
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
- }
+ }
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
/// position, and may either delete it or move it to outside of the loop.
///
void LICM::sink(Instruction &I) {
- DEBUG(dbgs() << "LICM sinking instruction: " << I);
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) {
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.removeFromParent();
- BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
- ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
}
- } else if (ExitBlocks.empty()) {
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
// The instruction is actually dead if there ARE NO exit blocks.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
- } else {
- // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
- // do all of the hard work of inserting PHI nodes as necessary. We convert
- // the value into a stack object to get it to do this.
-
- // Firstly, we create a stack object to hold the value...
- AllocaInst *AI = 0;
-
- if (!I.getType()->isVoidTy()) {
- AI = new AllocaInst(I.getType(), 0, I.getName(),
- I.getParent()->getParent()->getEntryBlock().begin());
- CurAST->add(AI);
- }
-
- // Secondly, insert load instructions for each use of the instruction
- // outside of the loop.
- while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
-
- // If the user is a PHI Node, we actually have to insert load instructions
- // in all predecessor blocks, not in the PHI block itself!
- if (PHINode *UPN = dyn_cast<PHINode>(U)) {
- // Only insert into each predecessor once, so that we don't have
- // different incoming values from the same block!
- std::map<BasicBlock*, Value*> InsertedBlocks;
- for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
- if (UPN->getIncomingValue(i) == &I) {
- BasicBlock *Pred = UPN->getIncomingBlock(i);
- Value *&PredVal = InsertedBlocks[Pred];
- if (!PredVal) {
- // Insert a new load instruction right before the terminator in
- // the predecessor block.
- PredVal = new LoadInst(AI, "", Pred->getTerminator());
- CurAST->add(cast<LoadInst>(PredVal));
- }
-
- UPN->setIncomingValue(i, PredVal);
- }
-
- } else {
- LoadInst *L = new LoadInst(AI, "", U);
- U->replaceUsesOfWith(&I, L);
- CurAST->add(L);
- }
- }
-
- // Thirdly, insert a copy of the instruction in each exit block of the loop
- // that is dominated by the instruction, storing the result into the memory
- // location. Be careful not to insert the instruction into any particular
- // basic block more than once.
- std::set<BasicBlock*> InsertedBlocks;
- BasicBlock *InstOrigBB = I.getParent();
-
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
-
- if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
- // If we haven't already processed this exit block, do so now.
- if (InsertedBlocks.insert(ExitBlock).second) {
- // Insert the code after the last PHI node...
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
- Instruction *New;
- if (InsertedBlocks.size() == 1) {
- I.removeFromParent();
- ExitBlock->getInstList().insert(InsertPt, &I);
- New = &I;
- } else {
- New = I.clone();
- CurAST->copyValue(&I, New);
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, store it into the alloca
- if (AI) new StoreInst(New, AI, InsertPt);
- }
- }
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (InsertedBlocks.empty()) {
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
-
- // Finally, promote the fine value to SSA form.
- if (AI) {
- std::vector<AllocaInst*> Allocas;
- Allocas.push_back(AI);
- PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
}
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
}
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) {
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
<< I << "\n");
- // Remove the instruction from its current basic block... but don't delete the
- // instruction.
- I.removeFromParent();
-
- // Insert the new node in Preheader, before the terminator.
- Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
return true;
}
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant. We promote these memory locations to use allocas
-/// instead. These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
///
-void LICM::PromoteValuesInLoop() {
- // PromotedValues - List of values that are promoted out of the loop. Each
- // value has an alloca instruction for it, and a canonical version of the
- // pointer.
- std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
- std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
- FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
- if (ValueToAllocaMap.empty()) return; // If there are values to promote.
-
- Changed = true;
- NumPromoted += PromotedValues.size();
-
- std::vector<Value*> PointerValueNumbers;
-
- // Emit a copy from the value into the alloca'd value in the loop preheader
- TerminatorInst *LoopPredInst = Preheader->getTerminator();
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- Value *Ptr = PromotedValues[i].second;
-
- // If we are promoting a pointer value, update alias information for the
- // inserted load.
- Value *LoadValue = 0;
- if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
- // Locate a load or store through the pointer, and assign the same value
- // to LI as we are loading or storing. Since we know that the value is
- // stored in this loop, this will always succeed.
- for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- LoadValue = LI;
- break;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(1) == Ptr) {
- LoadValue = SI->getOperand(0);
- break;
- }
- }
- }
- assert(LoadValue && "No store through the pointer found!");
- PointerValueNumbers.push_back(LoadValue); // Remember this for later.
- }
-
- // Load from the memory we are promoting.
- LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
- if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
- // Store into the temporary alloca.
- new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
- }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
- // Scan the basic blocks in the loop, replacing uses of our pointers with
- // uses of the allocas in question.
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
//
- for (Loop::block_iterator I = CurLoop->block_begin(),
- E = CurLoop->block_end(); I != E; ++I) {
- BasicBlock *BB = *I;
- // Rewrite all loads and stores in the block of the pointer...
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(L->getOperand(0));
- if (I != ValueToAllocaMap.end())
- L->setOperand(0, I->second); // Rewrite load instruction...
- } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(S->getOperand(1));
- if (I != ValueToAllocaMap.end())
- S->setOperand(1, I->second); // Rewrite store instruction...
- }
- }
- }
-
- // Now that the body of the loop uses the allocas instead of the original
- // memory locations, insert code to copy the alloca value back into the
- // original memory location on all exits from the loop. Note that we only
- // want to insert one copy of the code in each exit block, though the loop may
- // exit to the same block more than once.
+ // for () { if (c) *P += 1; }
//
- SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- if (!ProcessedBlocks.insert(ExitBlocks[i]))
- continue;
-
- // Copy all of the allocas into their memory locations.
- BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
- Instruction *InsertPos = BI;
- unsigned PVN = 0;
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- // Load from the alloca.
- LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
- // If this is a pointer type, update alias info appropriately.
- if (LI->getType()->isPointerTy())
- CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
- // Store into the memory we promoted.
- new StoreInst(LI, PromotedValues[i].second, InsertPos);
- }
- }
-
- // Now that we have done the deed, use the mem2reg functionality to promote
- // all of the new allocas we just created into real SSA registers.
+ // into:
//
- std::vector<AllocaInst*> PromotedAllocas;
- PromotedAllocas.reserve(PromotedValues.size());
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
- PromotedAllocas.push_back(PromotedValues[i].first);
- PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion. If these are found, create an alloca for the value, add it
-/// to the PromotedValues list, and keep track of the mapping from value to
-/// alloca.
-void LICM::FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
- Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
-
- // Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
- // We can promote this alias set if it has a store, if it is a "Must" alias
- // set, if the pointer is loop invariant, and if we are not eliminating any
- // volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- continue;
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
- assert(!AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
- Value *V = AS.begin()->getValue();
-
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
- {
- bool PointerOk = true;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- if (V->getType() != I->getValue()->getType()) {
- PointerOk = false;
- break;
- }
- if (!PointerOk)
- continue;
- }
-
- // It isn't safe to promote a load/store from the loop if the load/store is
- // conditional. For example, turning:
- //
- // for () { if (c) *P += 1; }
- //
- // into:
- //
- // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
- //
- // is not safe, because *P may only be valid to access if 'c' is true.
- //
- // It is safe to promote P if all uses are direct load/stores and if at
- // least one is guaranteed to be executed.
- bool GuaranteedToExecute = false;
- bool InvalidInst = false;
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
UI != UE; ++UI) {
- // Ignore instructions not in this loop.
+ // Ignore instructions that are outside the loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
if (!Use || !CurLoop->contains(Use))
continue;
-
- if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
- InvalidInst = true;
- break;
- }
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (isa<LoadInst>(Use))
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ else if (isa<StoreInst>(Use)) {
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ if (Use->getOperand(0) == ASIV) return;
+ } else
+ return; // Not a load or store.
if (!GuaranteedToExecute)
GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
}
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
- // If there is an non-load/store instruction in the loop, we can't promote
- // it. If there isn't a guaranteed-to-execute instruction, we can't
- // promote.
- if (InvalidInst || !GuaranteedToExecute)
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ // It wants to know some value of the same type as what we'll be inserting.
+ Value *SomeValue;
+ if (isa<LoadInst>(LoopUses[0]))
+ SomeValue = LoopUses[0];
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+ SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+ // First step: bucket up uses of the pointers by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the loop with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+ Instruction *User = LoopUses[LoopUse];
+ std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (isa<StoreInst>(User)) {
+ SSA.AddAvailableValue(User->getParent(),
+ cast<StoreInst>(User)->getOperand(0));
+ } else {
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ }
+ BlockUses.clear();
continue;
+ }
- const Type *Ty = cast<PointerType>(V->getType())->getElementType();
- AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
- PromotedValues.push_back(std::make_pair(AI, V));
+ // Otherwise, check to see if this block is all loads. If so, we can queue
+ // them all as live in loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
- // Update the AST and alias analysis.
- CurAST->copyValue(V, AI);
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ BasicBlock *BB = User->getParent();
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(S->getOperand(1))) continue;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Now that all the intra-loop values are classified, set up the preheader.
+ // It gets a load of the pointer we're promoting, and it is the live-out value
+ // from the preheader.
+ LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Now that the preheader is good to go, set up the exit blocks. Each exit
+ // block gets a store of the live-out values that feed them. Since we've
+ // already told the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
- DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ ALoad->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(ALoad, NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // If the preheader load is itself a pointer, we need to tell alias analysis
+ // about the new pointer we created in the preheader block and about any PHI
+ // nodes that just got inserted.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ CurAST->copyValue(SomeValue, PreheaderLoad);
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(SomeValue, NewPHIs[i]);
}
+
+ // Now that everything is rewritten, delete the old instructions from the body
+ // of the loop. They should all be dead now.
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ User->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(User, NewVal);
+ }
+
+ CurAST->deleteValue(User);
+ User->eraseFromParent();
+ }
+
+ // fwew, we're done!
}
+
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
@@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
void LICM::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index e4894e99b68f..543dfc1cba09 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -28,7 +28,7 @@ namespace {
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(&ID) {}
+ LoopDeletion() : LoopPass(ID) {}
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -38,9 +38,9 @@ namespace {
bool &Changed, BasicBlock *Preheader);
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<ScalarEvolution>();
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -55,7 +55,8 @@ namespace {
}
char LoopDeletion::ID = 0;
-static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+INITIALIZE_PASS(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false);
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 31058e5759a4..a4336743a8f0 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -74,7 +74,7 @@ namespace {
class LoopIndexSplit : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(&ID) {}
+ LoopIndexSplit() : LoopPass(ID) {}
// Index split Loop L. Return true if loop is split.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -197,8 +197,8 @@ namespace {
}
char LoopIndexSplit::ID = 0;
-static RegisterPass<LoopIndexSplit>
-X("loop-index-split", "Index Split Loops");
+INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
+ "Index Split Loops", false, false);
Pass *llvm::createLoopIndexSplitPass() {
return new LoopIndexSplit();
@@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
PI != PE; ++PI) {
BasicBlock *P = *PI;
- if (P == DeadBB || DT->dominates(DeadBB, P))
+ if (DT->dominates(DeadBB, P))
PredBlocks.push_back(P);
}
@@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
// the dominance frontiers.
for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
I != E; ++I) {
- if (*I == CondBB || !DT->dominates(CondBB, *I)) continue;
+ if (!DT->properlyDominates(CondBB, *I)) continue;
DominanceFrontier::iterator BBDF = DF->find(*I);
DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
@@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
bool usedOutsideBB = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(UI);
+ Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB)
usedOutsideBB = true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 16c4a15d3550..65acc1d9257a 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -35,7 +35,7 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(&ID) {}
+ LoopRotate() : LoopPass(ID) {}
// Rotate Loop L as many times as possible. Return true if
// loop is rotated at least once.
@@ -43,15 +43,15 @@ namespace {
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
// Helper functions
@@ -79,7 +79,7 @@ namespace {
}
char LoopRotate::ID = 0;
-static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
+INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
@@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal);
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
@@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ // Move the original header to the bottom of the loop, where it now more
+ // naturally belongs. This isn't necessary for correctness, and CodeGen can
+ // usually reorder blocks on its own to fix things like this up, but it's
+ // still nice to keep the IR readable.
+ //
+ // The original header should have only one predecessor at this point, since
+ // we checked that the loop had a proper preheader and unique backedge before
+ // we started.
+ assert(OrigHeader->getSinglePredecessor() &&
+ "Original loop header has too many predecessors after loop rotation!");
+ OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+ // Also, since this original header only has one predecessor, zap its
+ // PHI nodes, which are now trivial.
+ FoldSingleEntryPHINodes(OrigHeader);
+
+ // TODO: We could just go ahead and merge OrigHeader into its predecessor
+ // at this point, if we don't mind updating dominator info.
+
+ // Establish a new preheader, update dominators, etc.
preserveCanonicalLoopForm(LPM);
++NumRotated;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1f9b4156b9cd..e8dc5d3a640e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) {
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
- if (!RegUsesMap.count(Reg)) return false;
- const SmallBitVector &UsedByIndices =
- RegUsesMap.find(Reg)->second.UsedByIndices;
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
@@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
- const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
return 0;
@@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -965,6 +970,12 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ const Type *WidestFixupType;
+
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
@@ -976,15 +987,14 @@ public:
LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true) {}
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
- void check() const;
-
void print(raw_ostream &OS) const;
void dump() const;
};
@@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const {
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
}
void LSRUse::dump() const {
@@ -1354,6 +1367,10 @@ public:
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
- const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
@@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) {
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
- // Search all uses for the formula. This could be more clever. Ignore
- // ICmpZero uses because they may contain formulae generated by
- // GenerateICmpZeroScales, in which case adding fixup offsets may
- // be invalid.
+ // Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
- LU.Kind) {
+ F.AM.Scale == OrigF.AM.Scale) {
if (F.AM.BaseOffs == 0)
return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
break;
}
}
}
}
+ // Nothing looked good.
return 0;
}
@@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- next(I); NewStrideIter != E; ++NewStrideIter) {
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
@@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
@@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
- SmallVectorImpl<const SCEV *> &UninterestingOps,
const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(*I, C, Ops, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
@@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- C, Ops, UninterestingOps, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, UninterestingOps, L, SE);
+ Ops, L, SE);
return;
}
}
- // Otherwise use the value itself. Loop-variant "unknown" values are
- // uninteresting; we won't be able to do anything meaningful with them.
- if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
- UninterestingOps.push_back(S);
- else
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
- CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
- // Add any uninteresting values as one register, as we won't be able to
- // form any interesting reassociation opportunities with them. They'll
- // just have to be added inside the loop no matter what we do.
- if (!UninterestingAddOps.empty())
- AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ continue;
+
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
- ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
- (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() {
}
GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
}
/// If their are multiple formulae with the same set of registers used
@@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.AM.BaseOffs;
- DEBUG(errs() << "New fixup has offset "
+ DEBUG(dbgs() << "New fixup has offset "
<< Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
@@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
@@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
}
}
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
@@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
- DEBUG(dbgs() << "\n"
- "After generating reuse formulae:\n";
- print_uses(dbgs()));
-
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
@@ -3724,15 +3792,15 @@ private:
}
char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false);
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(&ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4ad41ae4b59f..d0edfa220051 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -26,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -42,7 +43,7 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
+ LoopUnroll() : LoopPass(ID) {}
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
@@ -55,23 +56,24 @@ namespace {
/// loop preheaders be inserted into the CFG...
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
};
}
char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
// FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
return true;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0c900ffc4027..9afe428ba569 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -77,7 +77,6 @@ namespace {
bool redoLoop;
Loop *currentLoop;
- DominanceFrontier *DF;
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
@@ -92,15 +91,15 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
- LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
loopPreheader(NULL) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
+ /// loop preheaders be inserted into the CFG.
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +109,6 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
private:
@@ -160,7 +158,7 @@ namespace {
};
}
char LoopUnswitch::ID = 0;
-static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
+INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DF = getAnalysisIfAvailable<DominanceFrontier>();
DT = getAnalysisIfAvailable<DominatorTree>();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
DT->runOnFunction(*F);
- if (DF)
- DF->runOnFunction(*F);
}
return Changed;
}
@@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-/// 1. Exit the loop with no side effects.
-/// 2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
/// exit through.
///
static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
BasicBlock *&ExitBB,
std::set<BasicBlock*> &Visited) {
if (!Visited.insert(BB).second) {
- // Already visited and Ok, end of recursion.
- return true;
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
} else if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
@@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header are ok.
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
BasicBlock *ExitBB = 0;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
@@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
- // Check to see if a successor of the branch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 000000000000..973ffe7e6a40
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,161 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+bool LowerAtomicIntrinsic(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee)
+ return false;
+
+ unsigned IID = Callee->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::memory_barrier:
+ break;
+
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Delta = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+ switch (IID) {
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Builder.CreateStore(Val, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_cmp_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Cmp = CI->getArgOperand(1);
+ Value *Val = CI->getArgOperand(2);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+
+ return true;
+}
+
+struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {}
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ Changed |= LowerAtomicIntrinsic(CI);
+ }
+ return Changed;
+ }
+
+};
+
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false);
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0e566c5bd9be..24fae423d2f7 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -304,7 +304,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(&ID) {}
+ MemCpyOpt() : FunctionPass(ID) {}
private:
// This transformation requires dominator postdominator info
@@ -331,8 +331,7 @@ namespace {
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-static RegisterPass<MemCpyOpt> X("memcpyopt",
- "MemCpy Optimization");
+INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
@@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// If the call is readnone, ignore it, otherwise bail out. We don't even
// allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite::get(BI)) ==
+ if (AA.getModRefBehavior(CallSite(BI)) ==
AliasAnalysis::DoesNotAccessMemory)
continue;
@@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// because we'll need to do type comparisons based on the underlying type.
Value *cpyDest = cpy->getDest();
Value *cpySrc = cpy->getSource();
- CallSite CS = CallSite::get(C);
+ CallSite CS(C);
// We need to be able to reason about the size of the memcpy, so we require
// that it be a constant.
@@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
return true;
}
-/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
-/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
-/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
-/// This allows later passes to remove the first memcpy altogether.
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
@@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
const Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys, 3));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 98452f5d82c4..b8afcc12d927 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,7 @@ namespace {
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(&ID) {}
+ Reassociate() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -103,7 +103,8 @@ namespace {
}
char Reassociate::ID = 0;
-static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false);
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 13222ac22004..506b72ac34e0 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(&ID) {}
+ RegToMem() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,8 +59,8 @@ namespace {
}
char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
+INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false);
bool RegToMem::runOnFunction(Function &F) {
@@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) {
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
//
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 907ece8fcce9..6115c05c20ac 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -275,12 +275,12 @@ public:
return I->second;
}
- LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
StructValueState.find(std::make_pair(V, i));
assert(I != StructValueState.end() && "V is not in valuemap!");
return I->second;
- }
+ }*/
/// getTrackedRetVals - Get the inferred return value map.
///
@@ -508,17 +508,16 @@ private:
void visitLoadInst (LoadInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitCallInst (CallInst &I) {
- visitCallSite(CallSite::get(&I));
+ visitCallSite(&I);
}
void visitInvokeInst (InvokeInst &II) {
- visitCallSite(CallSite::get(&II));
+ visitCallSite(&II);
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVANextInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,7 @@ namespace {
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(&ID) {}
+ SCCP() : FunctionPass(ID) {}
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
@@ -1600,8 +1599,8 @@ namespace {
} // end anonymous namespace
char SCCP::ID = 0;
-static RegisterPass<SCCP>
-X("sccp", "Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false);
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1702,14 +1701,15 @@ namespace {
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(&ID) {}
+ IPSCCP() : ModulePass(ID) {}
bool runOnModule(Module &M);
};
} // end anonymous namespace
char IPSCCP::ID = 0;
-static RegisterPass<IPSCCP>
-Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false);
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
@@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
@@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) {
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
- if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
- Solver.AddArgumentTrackedFunction(F);
- continue;
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
}
// Assume the function is called.
@@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) {
continue;
// We can only do this if we know that nothing else can call the function.
- if (!F->hasLocalLinkage() || AddressIsTaken(F))
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index dd445f63320a..fee317dbd9ab 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Target/TargetData.h"
@@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(&ID) {
+ explicit SROA(signed T = -1) : FunctionPass(ID) {
if (T == -1)
SRThreshold = 128;
else
@@ -114,8 +115,7 @@ namespace {
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
- AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,7 +135,8 @@ namespace {
}
char SROA::ID = 0;
-static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+INITIALIZE_PASS(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false);
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
@@ -193,6 +194,27 @@ private:
};
} // end anonymous namespace.
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form. We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+ StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+ if (!Triple.startswith("i386") &&
+ !Triple.startswith("x86_64"))
+ return false;
+
+ // Reject all the MMX vector types.
+ switch (VTy->getNumElements()) {
+ default: return false;
+ case 1: return VTy->getElementType()->isIntegerTy(64);
+ case 2: return VTy->getElementType()->isIntegerTy(32);
+ case 4: return VTy->getElementType()->isIntegerTy(16);
+ case 8: return VTy->getElementType()->isIntegerTy(8);
+ }
+}
+
+
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+ !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == CallInst::ArgOffset, Info);
+ UI.getOperandNo() == 0, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ return HasPadding(ATy->getElementType(), TD);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return HasPadding(VTy->getElementType(), TD);
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
@@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
-
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- return HasPadding(ATy->getElementType(), TD);
- } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- return HasPadding(VTy->getElementType(), TD);
}
+
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
@@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != CallInst::ArgOffset) return false;
+ if (UI.getOperandNo() != 0) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 49d93a2fcc27..360749caf111 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(&ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
};
}
char CFGSimplifyPass::ID = 0;
-static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+ "Simplify the CFG", false, false);
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
@@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks (except the first one) and remove them
- // if they are unneeded...
+ // Loop over all of the basic blocks and remove them if they are unneeded...
//
- for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TD)) {
LocalChange = true;
++NumSimpl;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index c3408e77807f..3ec70ec2e024 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,7 @@ namespace {
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -46,8 +46,8 @@ namespace {
char SimplifyHalfPowrLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyHalfPowrLibCalls>
-X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
+ "Simplify half_powr library calls", false, false);
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b1c619125c35..d7ce53f36715 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization {
StrLen, B, TD);
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
- ICmpInst *Old = cast<ICmpInst>(UI++);
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()),
"cmp");
@@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
- CI->getType());
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+ ToFindStr[0], B, TD), CI->getType());
return 0;
}
};
@@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
- false);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt8Ty(*Context), false);
EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
return CI->getArgOperand(0);
}
@@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()+1), 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
+ ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
+ FormatStr.size() + 1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1236,7 +1237,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1253,8 +1254,8 @@ namespace {
char SimplifyLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyLibCalls>
-X("simplify-libcalls", "Simplify well-known library calls");
+INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false);
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(pow(x,y),z)-> pow(x,y*z)
//
// puts:
-// * puts("") -> putchar("\n")
+// * puts("") -> putchar('\n')
//
// round, roundf, roundl:
// * round(cnst) -> cnst'
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b88ba4850509..95d3dedfb62d 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,7 @@ namespace {
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(&ID) {}
+ Sinking() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -56,8 +56,7 @@ namespace {
} // end anonymous namespace
char Sinking::ID = 0;
-static RegisterPass<Sinking>
-X("sink", "Code sinking");
+INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 9208238f4ba5..2e437ac778c8 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -49,7 +49,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(&ID) {}
+ TailDup() : FunctionPass(ID) {}
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +59,7 @@ namespace {
}
char TailDup::ID = 0;
-static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication");
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 01c8e5d6fcf4..371725467a24 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(&ID) {}
+ TailCallElim() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -87,7 +87,8 @@ namespace {
}
char TailCallElim::ID = 0;
-static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination");
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false);
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != IgnoreRI) {
- Value *RetOp = RI->getOperand(0);
-
- // We can only perform this transformation if the value returned is
- // evaluatable at the start of the initial invocation of the function,
- // instead of at the end of the evaluation.
- //
- if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
-
- if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
- ReturnedValue = RetOp;
- }
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
return ReturnedValue;
}
@@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
- // Exactly one operand should be the result of the call instruction...
+ // Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return 0;
@@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
- if (!CanMoveAboveCall(BBI, CI)) {
- // If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
- // using accumulator recursion elimination. Check to see if this is the
- // case, and if so, remember the initial accumulator value for later.
- if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
- // Yes, this is accumulator recursion. Remember which instruction
- // accumulates.
- AccumulatorRecursionInstr = BBI;
- } else {
- return false; // Otherwise, we cannot eliminate the tail recursion!
- }
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
}
+ }
// We can only transform call/return pairs that either ignore the return value
// of the call and return void, ignore the value of the call and return a
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index ec625b4cbb28..093083a630cf 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -97,23 +97,13 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
- pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- // Can't merge the entry block. Don't merge away blocks who have their
- // address taken: this is a bug if the predecessor block is the entry node
- // (because we'd end up taking the address of the entry) and undesirable in
- // any case.
- if (pred_begin(BB) == pred_end(BB) ||
- BB->hasAddressTaken()) return false;
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
- BasicBlock *PredBB = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != PredBB) {
- PredBB = 0; // There are multiple different predecessors...
- break;
- }
-
- // Can't merge if there are multiple predecessors.
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
if (!PredBB) return false;
+
// Don't break self-loops.
if (PredBB == BB) return false;
// Don't break invokes.
@@ -267,7 +257,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
case Instruction::Switch: // Should remove entry
default:
case Instruction::Ret: // Cannot happen, has no successors!
- llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
+ llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
}
if (NewTI) // If it's a different instruction, replace.
@@ -421,7 +411,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
if (DT)
DT->splitBlock(NewBB);
- if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
+ if (DominanceFrontier *DF =
+ P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
DF->splitBlock(NewBB);
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index f0e31efa30c4..23a30cc58507 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -82,8 +82,8 @@ void BasicInlinerImpl::inlineFunctions() {
Function *F = *FI;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
- CallSite CS = CallSite::get(I);
- if (CS.getInstruction() && CS.getCalledFunction()
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()
&& !CS.getCalledFunction()->isDeclaration())
CallSites.push_back(CS);
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 26f53c05a042..f75ffe6105fa 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(&ID) {}
+ BreakCriticalEdges() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -53,11 +53,11 @@ namespace {
}
char BreakCriticalEdges::ID = 0;
-static RegisterPass<BreakCriticalEdges>
-X("break-crit-edges", "Break critical edges in CFG");
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::BreakCriticalEdgesID = &X;
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
@@ -225,7 +225,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
UI != E; ) {
Value::use_iterator Use = UI++;
- if (PHINode *PN = dyn_cast<PHINode>(Use)) {
+ if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
// Remove one entry from each PHI.
if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
PN->setOperand(Use.getOperandNo(), NewBB);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 7a9d007ed558..c3139498c250 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -421,9 +421,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- 1, false, B, TD);
+ if (isFoldable(3, 2, false)) {
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -444,9 +444,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- 1, false, B, TD);
+ if (isFoldable(3, 2, false)) {
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -462,10 +462,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2),
+ false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -487,7 +488,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) {
+ if (isFoldable(2, 1, true)) {
Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
Name.substr(2, 6));
replaceCall(Ret);
@@ -505,7 +506,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TD, Name.substr(2, 7));
replaceCall(Ret);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index dec227acafd2..61cbeb2bd35b 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -20,7 +20,6 @@ add_llvm_library(LLVMTransformUtils
Mem2Reg.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
- SSI.cpp
SimplifyCFG.cpp
UnifyFunctionExitNodes.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1dcfd5787846..f43186edae43 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,7 +23,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Support/CFG.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -69,10 +69,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
}
// Clone OldFunc into NewFunc, transforming the old arguments into references to
-// ArgMap values.
+// VMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -126,7 +127,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap);
+ RemapInstruction(II, VMap, ModuleLevelChanges);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -139,6 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
///
Function *llvm::CloneFunction(const Function *F,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
@@ -167,7 +169,7 @@ Function *llvm::CloneFunction(const Function *F,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
return NewF;
}
@@ -180,6 +182,7 @@ namespace {
Function *NewFunc;
const Function *OldFunc;
ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
@@ -187,12 +190,14 @@ namespace {
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
- : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
@@ -313,7 +318,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap)))
+ VMap, ModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -334,25 +339,16 @@ ConstantFoldMappedInstruction(const Instruction *I) {
Ops.size(), TD);
}
-static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
- DILocation ILoc(InsnMD);
- if (!ILoc.Verify()) return InsnMD;
+static DebugLoc
+UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
+ LLVMContext &Ctx) {
+ DebugLoc NewLoc = TheCallDL;
+ if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
+ NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
+ Ctx);
- DILocation CallLoc(TheCallMD);
- if (!CallLoc.Verify()) return InsnMD;
-
- DILocation OrigLocation = ILoc.getOrigLocation();
- MDNode *NewLoc = TheCallMD;
- if (OrigLocation.Verify())
- NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD);
-
- Value *MDVs[] = {
- InsnMD->getOperand(0), // Line
- InsnMD->getOperand(1), // Col
- InsnMD->getOperand(2), // Scope
- NewLoc
- };
- return MDNode::get(InsnMD->getContext(), MDVs, 4);
+ return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
+ InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -364,6 +360,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
@@ -377,8 +374,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
- PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
- NameSuffix, CodeInfo, TD);
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ Returns, NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -408,10 +405,9 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
//
BasicBlock::iterator I = NewBB->begin();
- unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg");
- MDNode *TheCallMD = NULL;
- if (TheCall && TheCall->hasMetadata())
- TheCallMD = TheCall->getMetadata(DbgKind);
+ DebugLoc TheCallDL;
+ if (TheCall)
+ TheCallDL = TheCall->getDebugLoc();
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
@@ -420,15 +416,17 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
BasicBlock::const_iterator OldI = BI->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
if (I->hasMetadata()) {
- if (TheCallMD) {
- if (MDNode *IMD = I->getMetadata(DbgKind)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
- I->setMetadata(DbgKind, NewMD);
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- I->setMetadata(DbgKind, 0);
+ I->setDebugLoc(DebugLoc());
}
}
PHIToResolve.push_back(cast<PHINode>(OldI));
@@ -444,18 +442,20 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Otherwise, remap the rest of the instructions normally.
for (; I != NewBB->end(); ++I) {
if (I->hasMetadata()) {
- if (TheCallMD) {
- if (MDNode *IMD = I->getMetadata(DbgKind)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
- I->setMetadata(DbgKind, NewMD);
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- I->setMetadata(DbgKind, 0);
+ I->setDebugLoc(DebugLoc());
}
}
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap, ModuleLevelChanges);
}
}
@@ -477,7 +477,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
if (BasicBlock *MappedBlock =
cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap);
+ VMap, ModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index fc603d23e9ac..b347bf597f8e 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -17,7 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/TypeSymbolTable.h"
#include "llvm/Constant.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
/// CloneModule - Return an exact copy of the specified module. This is not as
@@ -89,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- VMap)));
+ VMap,
+ true)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -108,7 +109,7 @@ Module *llvm::CloneModule(const Module *M,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, VMap, Returns);
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
F->setLinkage(I->getLinkage());
@@ -120,34 +121,17 @@ Module *llvm::CloneModule(const Module *M,
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
}
// And named metadata....
for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
E = M->named_metadata_end(); I != E; ++I) {
const NamedMDNode &NMD = *I;
- SmallVector<MDNode*, 4> MDs;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
- NamedMDNode::Create(New->getContext(), NMD.getName(),
- MDs.data(), MDs.size(), New);
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
}
- // Update metadata attach with instructions.
- for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end();
- FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
- BI->getAllMetadata(MDs);
- for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator
- MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
- Value *MappedValue = MapValue(MDI->second, VMap);
- if (MDI->second != MappedValue && MappedValue)
- BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
- }
- }
return New;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 598e7d29e378..88979e862df2 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -215,12 +215,12 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
if (I->second->getFunction() == 0)
if (Function *F = CallSite(NewCall).getCalledFunction()) {
// Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]);
-
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
continue;
}
-
- CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
}
// Update the call graph by deleting the edge from Callee to Caller. We must
@@ -365,7 +365,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, IFI.TD, TheCall);
// Remember the first block that is newly cloned over.
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 090af95c4b87..5ca82996b42f 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
namespace {
struct InstNamer : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(&ID) {}
+ InstNamer() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -48,12 +48,12 @@ namespace {
};
char InstNamer::ID = 0;
- static RegisterPass<InstNamer> X("instnamer",
- "Assign names to anonymous instructions");
+ INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false);
}
-const PassInfo *const llvm::InstructionNamerID = &X;
+char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
//
// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index e90c30bba78e..275b26508f99 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,7 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(&ID) {}
+ LCSSA() : LoopPass(ID) {}
// Cached analysis information for the current function.
DominatorTree *DT;
@@ -64,22 +64,13 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- // LCSSA doesn't actually require LoopSimplify, but the PassManager
- // doesn't know how to schedule LoopSimplify by itself.
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredTransitive<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequiredTransitive<DominatorTree>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
-
- // Request DominanceFrontier now, even though LCSSA does
- // not use it. This allows Pass Manager to schedule Dominance
- // Frontier early enough such that one LPPassManager can handle
- // multiple loop transformation passes.
- AU.addRequired<DominanceFrontier>();
AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
}
private:
bool ProcessInstruction(Instruction *Inst,
@@ -99,10 +90,10 @@ namespace {
}
char LCSSA::ID = 0;
-static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-const PassInfo *const llvm::LCSSAID = &X;
+char &llvm::LCSSAID = LCSSA::ID;
/// BlockDominatesAnExit - Return true if the specified block dominates at least
@@ -215,7 +206,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
DomTreeNode *DomNode = DT->getNode(DomBB);
SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst);
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8e9113871f47..52f0499f39b0 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -490,6 +490,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
/// rewriting all the predecessors to branch to the successor block and return
/// true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
// We can't eliminate infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
if (BB == Succ) return false;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 4f4edf3a754c..b3c4801a4f15 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -46,9 +46,9 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -65,27 +65,30 @@ STATISTIC(NumNested , "Number of nested loops split out");
namespace {
struct LoopSimplify : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(&ID) {}
+ LoopSimplify() : LoopPass(ID) {}
// AA - If we have an alias analysis object to update, this is it, otherwise
// this is null.
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ ScalarEvolution *SE;
Loop *L;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We need loop information to identify the loops...
- AU.addRequiredTransitive<LoopInfo>();
- AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
+
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreservedID(LCSSAID);
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -104,11 +107,11 @@ namespace {
}
char LoopSimplify::ID = 0;
-static RegisterPass<LoopSimplify>
-X("loopsimplify", "Canonicalize natural loops", true);
+INITIALIZE_PASS(LoopSimplify, "loopsimplify",
+ "Canonicalize natural loops", true, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::LoopSimplifyID = &X;
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
@@ -120,6 +123,7 @@ bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
AA = getAnalysisIfAvailable<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
Changed |= ProcessLoop(L, LPM);
@@ -141,15 +145,16 @@ ReprocessLoop:
BB != E; ++BB) {
if (*BB == L->getHeader()) continue;
- SmallPtrSet<BasicBlock *, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
BasicBlock *P = *PI;
if (!L->contains(P))
BadPreds.insert(P);
}
// Delete each unique out-of-loop (and thus dead) predecessor.
- for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
@@ -530,6 +535,12 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
OuterLoopPreds.size(),
@@ -619,6 +630,11 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
if (P != Preheader) BackedgeBlocks.push_back(P);
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e0e07e7bbc82..236bbe9057bf 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -127,6 +128,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
return false;
}
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
// Find trip multiple if count is not available
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 2696e6913f3b..a46dd8402aca 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -78,14 +78,14 @@ namespace {
static char ID; // Pass identification, replacement for typeid
explicit LowerInvoke(const TargetLowering *tli = NULL,
bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
@@ -100,10 +100,11 @@ namespace {
}
char LowerInvoke::ID = 0;
-static RegisterPass<LowerInvoke>
-X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false);
-const PassInfo *const llvm::LowerInvokePassID = &X;
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 468a5fe4c5e5..5530b4700aac 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -29,19 +29,18 @@ using namespace llvm;
namespace {
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
- /// instructions. Note that this cannot be a BasicBlock pass because it
- /// modifies the CFG!
+ /// instructions.
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSwitch() : FunctionPass(&ID) {}
+ LowerSwitch() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerInvokePassID);
}
@@ -50,8 +49,7 @@ namespace {
Constant* High;
BasicBlock* BB;
- CaseRange() : Low(0), High(0), BB(0) { }
- CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
Low(low), High(high), BB(bb) { }
};
@@ -81,11 +79,11 @@ namespace {
}
char LowerSwitch::ID = 0;
-static RegisterPass<LowerSwitch>
-X("lowerswitch", "Lower SwitchInst's to branches");
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::LowerSwitchID = &X;
+char &llvm::LowerSwitchID = LowerSwitch::ID;
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
return new LowerSwitch();
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 99203b662120..101645bd92b7 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -27,7 +27,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(&ID) {}
+ PromotePass() : FunctionPass(ID) {}
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
@@ -49,7 +49,8 @@ namespace {
} // end of anonymous namespace
char PromotePass::ID = 0;
-static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false);
bool PromotePass::runOnFunction(Function &F) {
std::vector<AllocaInst*> Allocas;
@@ -81,8 +82,6 @@ bool PromotePass::runOnFunction(Function &F) {
return Changed;
}
-// Publically exposed interface to pass...
-const PassInfo *const llvm::PromoteMemoryToRegisterID = &X;
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
//
FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index c0de1938b2db..a4e3029e3a5a 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -228,14 +228,6 @@ namespace {
void run();
- /// properlyDominates - Return true if I1 properly dominates I2.
- ///
- bool properlyDominates(Instruction *I1, Instruction *I2) const {
- if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
- I1 = II->getNormalDest()->begin();
- return DT.properlyDominates(I1->getParent(), I2->getParent());
- }
-
/// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
///
bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
@@ -896,11 +888,12 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DIVar, SI);
// Propagate any debug metadata from the store onto the dbg.value.
- if (MDNode *SIMD = SI->getMetadata("dbg"))
- DbgVal->setMetadata("dbg", SIMD);
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
// Otherwise propagate debug metadata from dbg.declare.
- else if (MDNode *MD = DDI->getMetadata("dbg"))
- DbgVal->setMetadata("dbg", MD);
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
}
// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index f4bdb527655a..c855988307ea 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -29,20 +29,21 @@ static AvailableValsTy &getAvailableVals(void *AV) {
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete &getAvailableVals(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates. ProtoValue is the value used to name PHI nodes.
-void SSAUpdater::Initialize(Value *ProtoValue) {
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- PrototypeValue = ProtoValue;
+ ProtoType = Ty;
+ ProtoName = Name;
}
/// HasValueForBlock - Return true if the SSAUpdater already has a value for
@@ -54,8 +55,8 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
/// AddAvailableValue - Indicate that a rewritten value is available in the
/// specified block with the specified value.
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
- assert(PrototypeValue->getType() == V->getType() &&
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
"All rewritten values must have the same type");
getAvailableVals(AV)[BB] = V;
}
@@ -148,7 +149,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If there are no predecessors, just return undef.
if (PredValues.empty())
- return UndefValue::get(PrototypeValue->getType());
+ return UndefValue::get(ProtoType);
// Otherwise, if all the merged values are the same, just use it.
if (SingularValue != 0)
@@ -168,9 +169,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
- PrototypeValue->getName(),
- &BB->front());
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
InsertedPHI->reserveOperandSpace(PredValues.size());
// Fill in all the predecessors of the PHI.
@@ -205,6 +204,22 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
/// in the SSAUpdaterImpl template.
namespace {
@@ -266,15 +281,14 @@ public:
/// GetUndefVal - Get an undefined value of the same type as the value
/// being handled.
static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
- return UndefValue::get(Updater->PrototypeValue->getType());
+ return UndefValue::get(Updater->ProtoType);
}
/// CreateEmptyPHI - Create a new PHI instruction in the specified block.
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
- Updater->PrototypeValue->getName(),
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
&BB->front());
PHI->reserveOperandSpace(NumPreds);
return PHI;
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
deleted file mode 100644
index 4e813ddf95c7..000000000000
--- a/lib/Transforms/Utils/SSI.cpp
+++ /dev/null
@@ -1,432 +0,0 @@
-//===------------------- SSI.cpp - Creates SSI Representation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts a list of variables to the Static Single Information
-// form. This is a program representation described by Scott Ananian in his
-// Master Thesis: "The Static Single Information Form (1999)".
-// We are building an on-demand representation, that is, we do not convert
-// every single variable in the target function to SSI form. Rather, we receive
-// a list of target variables that must be converted. We also do not
-// completely convert a target variable to the SSI format. Instead, we only
-// change the variable in the points where new information can be attached
-// to its live range, that is, at branch points.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ssi"
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-
-using namespace llvm;
-
-static const std::string SSI_PHI = "SSI_phi";
-static const std::string SSI_SIG = "SSI_sigma";
-
-STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
-STATISTIC(NumPhiInserted, "Number of phi functions inserted");
-
-void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominanceFrontier>();
- AU.addRequiredTransitive<DominatorTree>();
- AU.setPreservesAll();
-}
-
-bool SSI::runOnFunction(Function &F) {
- DT_ = &getAnalysis<DominatorTree>();
- return false;
-}
-
-/// This methods creates the SSI representation for the list of values
-/// received. It will only create SSI representation if a value is used
-/// to decide a branch. Repeated values are created only once.
-///
-void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
- init(value);
-
- SmallPtrSet<Instruction*, 4> needConstruction;
- for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- if (created.insert(*I))
- needConstruction.insert(*I);
-
- insertSigmaFunctions(needConstruction);
-
- // Test if there is a need to transform to SSI
- if (!needConstruction.empty()) {
- insertPhiFunctions(needConstruction);
- renameInit(needConstruction);
- rename(DT_->getRoot());
- fixPhis();
- }
-
- clean();
-}
-
-/// Insert sigma functions (a sigma function is a phi function with one
-/// operator)
-///
-void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- for (Value::use_iterator begin = (*I)->use_begin(),
- end = (*I)->use_end(); begin != end; ++begin) {
- // Test if the Use of the Value is in a comparator
- if (CmpInst *CI = dyn_cast<CmpInst>(begin)) {
- // Iterates through all uses of CmpInst
- for (Value::use_iterator begin_ci = CI->use_begin(),
- end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
- // Test if any use of CmpInst is in a Terminator
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) {
- insertSigma(TI, *I);
- }
- }
- }
- }
- }
-}
-
-/// Inserts Sigma Functions in every BasicBlock successor to Terminator
-/// Instruction TI. All inserted Sigma Function are related to Instruction I.
-///
-void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
- // Basic Block of the Terminator Instruction
- BasicBlock *BB = TI->getParent();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
- // Next Basic Block
- BasicBlock *BB_next = TI->getSuccessor(i);
- if (BB_next != BB &&
- BB_next->getSinglePredecessor() != NULL &&
- dominateAny(BB_next, I)) {
- PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
- PN->addIncoming(I, BB);
- sigmas[PN] = I;
- created.insert(PN);
- defsites[I].push_back(BB_next);
- ++NumSigmaInserted;
- }
- }
-}
-
-/// Insert phi functions when necessary
-///
-void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
- DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- // Test if there were any sigmas for this variable
- SmallPtrSet<BasicBlock *, 16> BB_visited;
-
- // Insert phi functions if there is any sigma function
- while (!defsites[*I].empty()) {
-
- BasicBlock *BB = defsites[*I].back();
-
- defsites[*I].pop_back();
- DominanceFrontier::iterator DF_BB = DF->find(BB);
-
- // The BB is unreachable. Skip it.
- if (DF_BB == DF->end())
- continue;
-
- // Iterates through all the dominance frontier of BB
- for (std::set<BasicBlock *>::iterator DF_BB_begin =
- DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
- DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
- BasicBlock *BB_dominated = *DF_BB_begin;
-
- // Test if has not yet visited this node and if the
- // original definition dominates this node
- if (BB_visited.insert(BB_dominated) &&
- DT_->properlyDominates(value_original[*I], BB_dominated) &&
- dominateAny(BB_dominated, *I)) {
- PHINode *PN = PHINode::Create(
- (*I)->getType(), SSI_PHI, BB_dominated->begin());
- phis.insert(std::make_pair(PN, *I));
- created.insert(PN);
-
- defsites[*I].push_back(BB_dominated);
- ++NumPhiInserted;
- }
- }
- }
- BB_visited.clear();
- }
-}
-
-/// Some initialization for the rename part
-///
-void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- value_stack[*I].push_back(*I);
-}
-
-/// Renames all variables in the specified BasicBlock.
-/// Only variables that need to be rename will be.
-///
-void SSI::rename(BasicBlock *BB) {
- SmallPtrSet<Instruction*, 8> defined;
-
- // Iterate through instructions and make appropriate renaming.
- // For SSI_PHI (b = PHI()), store b at value_stack as a new
- // definition of the variable it represents.
- // For SSI_SIG (b = PHI(a)), substitute a with the current
- // value of a, present in the value_stack.
- // Then store bin the value_stack as the new definition of a.
- // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
- // all operands with its current value, present in value_stack.
- for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
- begin != end; ++begin) {
- Instruction *I = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
- Instruction* position;
-
- // Treat SSI_PHI
- if ((position = getPositionPhi(PN))) {
- value_stack[position].push_back(PN);
- defined.insert(position);
- // Treat SSI_SIG
- } else if ((position = getPositionSigma(PN))) {
- substituteUse(I);
- value_stack[position].push_back(PN);
- defined.insert(position);
- }
-
- // Treat all other PHI functions
- else {
- substituteUse(I);
- }
- }
-
- // Treat all other functions
- else {
- substituteUse(I);
- }
- }
-
- // This loop iterates in all BasicBlocks that are successors of the current
- // BasicBlock. For each SSI_PHI instruction found, insert an operand.
- // This operand is the current operand in value_stack for the variable
- // in "position". And the BasicBlock this operand represents is the current
- // BasicBlock.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
- BasicBlock *BB_succ = *SI;
-
- for (BasicBlock::iterator begin = BB_succ->begin(),
- notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
- Instruction *I = begin;
- PHINode *PN = dyn_cast<PHINode>(I);
- Instruction* position;
- if (PN && ((position = getPositionPhi(PN)))) {
- PN->addIncoming(value_stack[position].back(), BB);
- }
- }
- }
-
- // This loop calls rename on all children from this block. This time children
- // refers to a successor block in the dominance tree.
- DomTreeNode *DTN = DT_->getNode(BB);
- for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
- begin != end; ++begin) {
- DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
- BasicBlock *BB_children = DTN_children->getBlock();
- rename(BB_children);
- }
-
- // Now we remove all inserted definitions of a variable from the top of
- // the stack leaving the previous one as the top.
- for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
- DE = defined.end(); DI != DE; ++DI)
- value_stack[*DI].pop_back();
-}
-
-/// Substitute any use in this instruction for the last definition of
-/// the variable
-///
-void SSI::substituteUse(Instruction *I) {
- for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
- Value *operand = I->getOperand(i);
- for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
- VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
- if (operand == VI->second.front() &&
- I != VI->second.back()) {
- PHINode *PN_I = dyn_cast<PHINode>(I);
- PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
-
- // If a phi created in a BasicBlock is used as an operand of another
- // created in the same BasicBlock, this step marks this second phi,
- // to fix this issue later. It cannot be fixed now, because the
- // operands of the first phi are not final yet.
- if (PN_I && PN_vs &&
- VI->second.back()->getParent() == I->getParent()) {
-
- phisToFix.insert(PN_I);
- }
-
- I->setOperand(i, VI->second.back());
- break;
- }
- }
- }
-}
-
-/// Test if the BasicBlock BB dominates any use or definition of value.
-/// If it dominates a phi instruction that is on the same BasicBlock,
-/// that does not count.
-///
-bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
- for (Value::use_iterator begin = value->use_begin(),
- end = value->use_end(); begin != end; ++begin) {
- Instruction *I = cast<Instruction>(*begin);
- BasicBlock *BB_father = I->getParent();
- if (BB == BB_father && isa<PHINode>(I))
- continue;
- if (DT_->dominates(BB, BB_father)) {
- return true;
- }
- }
- return false;
-}
-
-/// When there is a phi node that is created in a BasicBlock and it is used
-/// as an operand of another phi function used in the same BasicBlock,
-/// LLVM looks this as an error. So on the second phi, the first phi is called
-/// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B. It also includes undef values for predecessors
-/// that were not included in the phi.
-///
-void SSI::fixPhis() {
- for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
- end = phisToFix.end(); begin != end; ++begin) {
- PHINode *PN = *begin;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
- PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
- if (PN_father && PN->getParent() == PN_father->getParent() &&
- !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
- BasicBlock *BB = PN->getIncomingBlock(i);
- int pos = PN_father->getBasicBlockIndex(BB);
- PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
- }
- }
- }
-
- for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
- end = phis.end(); begin != end; ++begin) {
- PHINode *PN = begin->first;
- BasicBlock *BB = PN->getParent();
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- SmallVector<BasicBlock*, 8> Preds(PI, PE);
- for (unsigned size = Preds.size();
- PI != PE && PN->getNumIncomingValues() != size; ++PI) {
- bool found = false;
- for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
- i < pn_end; ++i) {
- if (PN->getIncomingBlock(i) == *PI) {
- found = true;
- break;
- }
- }
- if (!found) {
- PN->addIncoming(UndefValue::get(PN->getType()), *PI);
- }
- }
- }
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the phis list.
-///
-Instruction* SSI::getPositionPhi(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
- if (val == phis.end())
- return 0;
- else
- return val->second;
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the sigmas list.
-///
-Instruction* SSI::getPositionSigma(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
- if (val == sigmas.end())
- return 0;
- else
- return val->second;
-}
-
-/// Initializes
-///
-void SSI::init(SmallVectorImpl<Instruction *> &value) {
- for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- value_original[*I] = (*I)->getParent();
- defsites[*I].push_back((*I)->getParent());
- }
-}
-
-/// Clean all used resources in this creation of SSI
-///
-void SSI::clean() {
- phis.clear();
- sigmas.clear();
- phisToFix.clear();
-
- defsites.clear();
- value_stack.clear();
- value_original.clear();
-}
-
-/// createSSIPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIPass() { return new SSI(); }
-
-char SSI::ID = 0;
-static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
-
-/// SSIEverything - A pass that runs createSSI on every non-void variable,
-/// intended for debugging.
-namespace {
- struct SSIEverything : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- SSIEverything() : FunctionPass(&ID) {}
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
- };
-}
-
-bool SSIEverything::runOnFunction(Function &F) {
- SmallVector<Instruction *, 16> Insts;
- SSI &ssi = getAnalysis<SSI>();
-
- if (F.isDeclaration() || F.isIntrinsic()) return false;
-
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
- for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
- if (!I->getType()->isVoidTy())
- Insts.push_back(I);
-
- ssi.createSSI(Insts);
- return true;
-}
-
-/// createSSIEverythingPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
-
-char SSIEverything::ID = 0;
-static RegisterPass<SSIEverything>
-Y("ssi-everything", "Static Single Information Construction");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 27b07d9731a5..28d7afbf1c33 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -949,7 +949,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
UI != E; ++UI) {
// Ignore any user that is not a PHI node in BB2. These can only occur in
// unreachable blocks, because they would not be dominated by the instr.
- PHINode *PN = dyn_cast<PHINode>(UI);
+ PHINode *PN = dyn_cast<PHINode>(*UI);
if (!PN || PN->getParent() != BB2)
return false;
PHIUses.push_back(PN);
@@ -1724,12 +1724,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
assert(BB && BB->getParent() && "Block not embedded in function!");
assert(BB->getTerminator() && "Degenerate basic block encountered!");
- assert(&BB->getParent()->getEntryBlock() != BB &&
- "Can't Simplify entry block!");
- // Remove basic blocks that have no predecessors... or that just have themself
- // as a predecessor. These are unreachable.
- if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ &BB->getParent()->getEntryBlock() != BB) ||
+ BB->getSinglePredecessor() == BB) {
DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
return true;
@@ -1880,8 +1880,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
while (isa<DbgInfoIntrinsic>(BBI))
++BBI;
if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
+ if (BB != &BB->getParent()->getEntryBlock())
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
} else { // Conditional branch
if (isValueEqualityComparison(BI)) {
@@ -2049,12 +2050,38 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
}
// If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
M->getBasicBlockList().erase(BB);
return true;
}
}
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ } else if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ }
}
// Merge basic blocks into their predecessor if there is only one distinct
@@ -2068,12 +2095,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// is a conditional branch, see if we can hoist any code from this block up
// into our predecessor.
pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- BasicBlock *OnlyPred = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != OnlyPred) {
+ BasicBlock *OnlyPred = 0;
+ for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
+ if (!OnlyPred)
+ OnlyPred = *PI;
+ else if (*PI != OnlyPred) {
OnlyPred = 0; // There are multiple different predecessors...
break;
}
+ }
if (OnlyPred)
if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
@@ -2172,8 +2202,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-/// WARNING: The entry node of a function may not be simplified.
-///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
return SimplifyCFGOpt(TD).run(BB);
}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 3fa8b70a8505..a51f1e1a47f6 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -24,8 +24,8 @@
using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
-static RegisterPass<UnifyFunctionExitNodes>
-X("mergereturn", "Unify function exit nodes");
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false);
Pass *llvm::createUnifyFunctionExitNodesPass() {
return new UnifyFunctionExitNodes();
@@ -35,7 +35,7 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 3f6a90c94ebb..fc4bde77d4f9 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -20,28 +20,51 @@
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges) {
Value *&VMSlot = VM[V];
if (VMSlot) return VMSlot; // Does it exist in the map yet?
// NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
// DenseMap. This includes any recursive calls to MapValue.
- // Global values and non-function-local metadata do not need to be seeded into
- // the VM if they are using the identity mapping.
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
- (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
+ (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
+ !ModuleLevelChanges))
return VMSlot = const_cast<Value*>(V);
if (const MDNode *MD = dyn_cast<MDNode>(V)) {
- SmallVector<Value*, 4> Elts;
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0);
- return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ // Start by assuming that we'll use the identity mapping.
+ VMSlot = const_cast<Value*>(V);
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+
+ // Ok, at least one operand needs remapping.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i)
+ Elts.push_back(MD->getOperand(i) ?
+ MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ Dummy->replaceAllUsesWith(NewMD);
+ MDNode::deleteTemporary(Dummy);
+ return VM[V] = NewMD;
+ }
+
+ // No operands needed remapping; keep the identity map.
+ return const_cast<Value*>(V);
}
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
@@ -51,7 +74,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This array must contain a reference to a global, make a new array
// and return it.
@@ -62,7 +85,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantArray::get(CA->getType(), Values);
}
}
@@ -72,7 +96,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This struct must contain a reference to a global, make a new struct
// and return it.
@@ -83,7 +107,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantStruct::get(CS->getType(), Values);
}
}
@@ -93,14 +118,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
std::vector<Constant*> Ops;
for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
- Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+ Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
return VM[V] = CE->getWithOperands(Ops);
}
if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This vector value must contain a reference to a global, make a new
// vector constant and return it.
@@ -111,7 +136,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantVector::get(Values);
}
}
@@ -119,19 +145,33 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
}
BlockAddress *BA = cast<BlockAddress>(C);
- Function *F = cast<Function>(MapValue(BA->getFunction(), VM));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM));
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
+ ModuleLevelChanges));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
+ ModuleLevelChanges));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges) {
+ // Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap);
+ Value *V = MapValue(*op, VMap, ModuleLevelChanges);
assert(V && "Referenced value not in value map!");
*op = V;
}
-}
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+}
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 09b8aa507d83..831a9960463d 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -16,7 +16,7 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#include "llvm/LLVMContext.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
@@ -63,8 +63,6 @@ static const Module *getModuleFromVal(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return GV->getParent();
- if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V))
- return NMD->getParent();
return 0;
}
@@ -230,7 +228,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
E = STy->element_end(); I != E; ++I) {
OS << ' ';
CalcTypeName(*I, TypeStack, OS);
- if (next(I) == STy->element_end())
+ if (llvm::next(I) == STy->element_end())
OS << ' ';
else
OS << ',';
@@ -240,21 +238,6 @@ void TypePrinting::CalcTypeName(const Type *Ty,
OS << '>';
break;
}
- case Type::UnionTyID: {
- const UnionType *UTy = cast<UnionType>(Ty);
- OS << "union {";
- for (StructType::element_iterator I = UTy->element_begin(),
- E = UTy->element_end(); I != E; ++I) {
- OS << ' ';
- CalcTypeName(*I, TypeStack, OS);
- if (next(I) == UTy->element_end())
- OS << ' ';
- else
- OS << ',';
- }
- OS << '}';
- break;
- }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -581,8 +564,12 @@ static SlotTracker *createSlotTracker(const Value *V) {
if (const Function *Func = dyn_cast<Function>(V))
return new SlotTracker(Func);
- if (isa<MDNode>(V))
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ if (!MD->isFunctionLocal())
+ return new SlotTracker(MD->getFunction());
+
return new SlotTracker((Function *)0);
+ }
return 0;
}
@@ -634,10 +621,8 @@ void SlotTracker::processModule() {
I = TheModule->named_metadata_begin(),
E = TheModule->named_metadata_end(); I != E; ++I) {
const NamedMDNode *NMD = I;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- if (MDNode *MD = NMD->getOperand(i))
- CreateMetadataSlot(MD);
- }
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ CreateMetadataSlot(NMD->getOperand(i));
}
// Add all the unnamed functions to the table.
@@ -778,15 +763,14 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
// Don't insert if N is a function-local metadata, these are always printed
// inline.
- if (N->isFunctionLocal())
- return;
-
- mdn_iterator I = mdnMap.find(N);
- if (I != mdnMap.end())
- return;
+ if (!N->isFunctionLocal()) {
+ mdn_iterator I = mdnMap.find(N);
+ if (I != mdnMap.end())
+ return;
- unsigned DestSlot = mdnNext++;
- mdnMap[N] = DestSlot;
+ unsigned DestSlot = mdnNext++;
+ mdnMap[N] = DestSlot;
+ }
// Recursively add any MDNodes referenced by operands.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -800,7 +784,8 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
TypePrinting *TypePrinter,
- SlotTracker *Machine);
+ SlotTracker *Machine,
+ const Module *Context);
@@ -856,7 +841,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinting &TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
@@ -972,9 +958,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
Out << "blockaddress(";
- WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+ Context);
Out << ", ";
- WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+ Context);
Out << ")";
return;
}
@@ -994,12 +982,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinter.print(ETy, Out);
Out << ' ';
WriteAsOperandInternal(Out, CA->getOperand(0),
- &TypePrinter, Machine);
+ &TypePrinter, Machine,
+ Context);
for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
Out << ", ";
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+ Context);
}
}
Out << ']';
@@ -1017,14 +1007,16 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinter.print(CS->getOperand(0)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+ Context);
for (unsigned i = 1; i < N; i++) {
Out << ", ";
TypePrinter.print(CS->getOperand(i)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+ Context);
}
Out << ' ';
}
@@ -1035,15 +1027,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
- if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
- Out << "{ ";
- TypePrinter.print(CU->getOperand(0)->getType(), Out);
- Out << ' ';
- WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine);
- Out << " }";
- return;
- }
-
if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
const Type *ETy = CP->getType()->getElementType();
assert(CP->getNumOperands() > 0 &&
@@ -1051,12 +1034,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << '<';
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
+ Context);
for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
Out << ", ";
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
+ Context);
}
Out << '>';
return;
@@ -1087,7 +1072,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
TypePrinter.print((*OI)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
if (OI+1 != CE->op_end())
Out << ", ";
}
@@ -1112,7 +1097,8 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinting *TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
Out << "!{";
for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
const Value *V = Node->getOperand(mi);
@@ -1122,7 +1108,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinter->print(V->getType(), Out);
Out << ' ';
WriteAsOperandInternal(Out, Node->getOperand(mi),
- TypePrinter, Machine);
+ TypePrinter, Machine, Context);
}
if (mi + 1 != me)
Out << ", ";
@@ -1138,7 +1124,8 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
///
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
TypePrinting *TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
if (V->hasName()) {
PrintLLVMName(Out, V);
return;
@@ -1147,7 +1134,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
const Constant *CV = dyn_cast<Constant>(V);
if (CV && !isa<GlobalValue>(CV)) {
assert(TypePrinter && "Constants require TypePrinting!");
- WriteConstantInternal(Out, CV, *TypePrinter, Machine);
+ WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
return;
}
@@ -1168,12 +1155,16 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
if (const MDNode *N = dyn_cast<MDNode>(V)) {
if (N->isFunctionLocal()) {
// Print metadata inline, not via slot reference number.
- WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine);
+ WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
return;
}
- if (!Machine)
- Machine = createSlotTracker(V);
+ if (!Machine) {
+ if (N->isFunctionLocal())
+ Machine = new SlotTracker(N->getFunction());
+ else
+ Machine = new SlotTracker(Context);
+ }
Out << '!' << Machine->getMetadataSlot(N);
return;
}
@@ -1227,8 +1218,9 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
// Fast path: Don't construct and populate a TypePrinting object if we
// won't be needing any types printed.
if (!PrintType &&
- (!isa<Constant>(V) || V->hasName() || isa<GlobalValue>(V))) {
- WriteAsOperandInternal(Out, V, 0, 0);
+ ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+ V->hasName() || isa<GlobalValue>(V))) {
+ WriteAsOperandInternal(Out, V, 0, 0, Context);
return;
}
@@ -1242,7 +1234,7 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
Out << ' ';
}
- WriteAsOperandInternal(Out, V, &TypePrinter, 0);
+ WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
}
namespace {
@@ -1297,7 +1289,7 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
TypePrinter.print(Operand->getType(), Out);
Out << ' ';
}
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
void AssemblyWriter::writeParamOperand(const Value *Operand,
@@ -1314,7 +1306,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
Out << ' ' << Attribute::getAsString(Attrs);
Out << ' ';
// Print the operand
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
void AssemblyWriter::printModule(const Module *M) {
@@ -1403,10 +1395,7 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
Out << "!" << NMD->getName() << " = !{";
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
if (i) Out << ", ";
- if (MDNode *MD = NMD->getOperand(i))
- Out << '!' << Machine.getMetadataSlot(MD);
- else
- Out << "null";
+ Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
}
Out << "}\n";
}
@@ -1421,6 +1410,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "linker_private_weak ";
break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "linker_private_weak_def_auto ";
+ break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
@@ -1451,7 +1443,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
if (GV->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
Out << " = ";
if (!GV->hasInitializer() && GV->hasExternalLinkage())
@@ -1510,7 +1502,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
TypePrinter.print(F->getFunctionType(), Out);
Out << "* ";
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
} else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
TypePrinter.print(GA->getType(), Out);
Out << ' ';
@@ -1593,7 +1585,7 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
TypePrinter.print(F->getReturnType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
Out << '(';
Machine.incorporateFunction(F);
@@ -1643,11 +1635,10 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->hasGC())
Out << " gc \"" << F->getGC() << '"';
if (F->isDeclaration()) {
- Out << "\n";
+ Out << '\n';
} else {
Out << " {";
-
- // Output all of its basic blocks... for the function
+ // Output all of the function's basic blocks.
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
printBasicBlock(I);
@@ -1696,7 +1687,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
Out.PadToColumn(50);
Out << "; Error: Block without parent!";
} else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
- // Output predecessors for the block...
+ // Output predecessors for the block.
Out.PadToColumn(50);
Out << ";";
const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
@@ -1734,13 +1725,6 @@ void AssemblyWriter::printInfoComment(const Value &V) {
AnnotationWriter->printInfoComment(V, Out);
return;
}
-
- if (V.getType()->isVoidTy()) return;
-
- Out.PadToColumn(50);
- Out << "; <";
- TypePrinter.print(V.getType(), Out);
- Out << "> [#uses=" << V.getNumUses() << ']'; // Output # uses
}
// This member is called for each Instruction in a function..
@@ -2029,7 +2013,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
} else {
Out << ", !<unknown kind #" << Kind << ">";
}
- Out << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ Out << ' ';
+ WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+ TheModule);
}
}
printInfoComment(I);
@@ -2077,7 +2063,7 @@ void AssemblyWriter::writeAllMDNodes() {
}
void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
- WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine);
+ WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
WriteMDNodeComment(Node, Out);
Out << "\n";
}
@@ -2093,6 +2079,13 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
W.printModule(this);
}
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(getParent());
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+ W.printNamedMDNode(this);
+}
+
void Type::print(raw_ostream &OS) const {
if (this == 0) {
OS << "<null Type>";
@@ -2130,15 +2123,11 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
SlotTracker SlotTable(F);
AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
W.printMDNodeBody(N);
- } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
- SlotTracker SlotTable(N->getParent());
- AssemblyWriter W(OS, SlotTable, N->getParent(), AAW);
- W.printNamedMDNode(N);
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInternal(OS, C, TypePrinter, 0);
+ WriteConstantInternal(OS, C, TypePrinter, 0, 0);
} else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
isa<Argument>(this)) {
WriteAsOperand(OS, this, true, 0);
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index dc39024e3945..9330e141c341 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -78,6 +78,63 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = F;
return true;
}
+ } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+ if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
+ Name.compare(14, 5, "vaddl", 5) == 0 ||
+ Name.compare(14, 5, "vsubl", 5) == 0 ||
+ Name.compare(14, 5, "vaddw", 5) == 0 ||
+ Name.compare(14, 5, "vsubw", 5) == 0 ||
+ Name.compare(14, 5, "vmull", 5) == 0 ||
+ Name.compare(14, 5, "vmlal", 5) == 0 ||
+ Name.compare(14, 5, "vmlsl", 5) == 0 ||
+ Name.compare(14, 5, "vabdl", 5) == 0 ||
+ Name.compare(14, 5, "vabal", 5) == 0) &&
+ (Name.compare(19, 2, "s.", 2) == 0 ||
+ Name.compare(19, 2, "u.", 2) == 0)) ||
+
+ (Name.compare(14, 4, "vaba", 4) == 0 &&
+ (Name.compare(18, 2, "s.", 2) == 0 ||
+ Name.compare(18, 2, "u.", 2) == 0)) ||
+
+ (Name.compare(14, 6, "vmovn.", 6) == 0)) {
+
+ // Calls to these are transformed into IR without intrinsics.
+ NewFn = 0;
+ return true;
+ }
+ // Old versions of NEON ld/st intrinsics are missing alignment arguments.
+ bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
+ bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
+ if (isVLd || isVSt) {
+ unsigned NumVecs = Name.at(17) - '0';
+ if (NumVecs == 0 || NumVecs > 4)
+ return false;
+ bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
+ if (!isLaneOp && Name.at(18) != '.')
+ return false;
+ unsigned ExpectedArgs = 2; // for the address and alignment
+ if (isVSt || isLaneOp)
+ ExpectedArgs += NumVecs;
+ if (isLaneOp)
+ ExpectedArgs += 1; // for the lane number
+ unsigned NumP = FTy->getNumParams();
+ if (NumP != ExpectedArgs - 1)
+ return false;
+
+ // Change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name.
+ F->setName("");
+
+ // One argument is missing: add the alignment argument.
+ std::vector<const Type*> NewParams;
+ for (unsigned p = 0; p < NumP; ++p)
+ NewParams.push_back(FTy->getParamType(p));
+ NewParams.push_back(Type::getInt32Ty(F->getContext()));
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
+ NewParams, false);
+ NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
+ return true;
+ }
}
break;
case 'b':
@@ -182,7 +239,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFnName = "llvm.memset.p0i8.i64";
}
if (NewFnName) {
- const FunctionType *FTy = F->getFunctionType();
NewFn = cast<Function>(M->getOrInsertFunction(NewFnName,
FTy->getReturnType(),
FTy->getParamType(0),
@@ -309,6 +365,73 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
return Upgraded;
}
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ StringRef Name(GV->getName());
+
+ // We are only upgrading one symbol here.
+ if (Name == ".llvm.eh.catch.all.value") {
+ GV->setName("llvm.eh.catch.all.value");
+ return true;
+ }
+
+ return false;
+}
+
+/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
+/// have vector elements twice as big as one or both source operands, do the
+/// sign- or zero-extension that used to be handled by intrinsics. The
+/// extended values are returned via V0 and V1.
+static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
+ Value *&V0, Value *&V1) {
+ Function *F = CI->getCalledFunction();
+ const std::string& Name = F->getName();
+ bool isLong = (Name.at(18) == 'l');
+ bool isSigned = (Name.at(19) == 's');
+
+ if (isSigned) {
+ if (isLong)
+ V0 = new SExtInst(Arg0, CI->getType(), "", CI);
+ else
+ V0 = Arg0;
+ V1 = new SExtInst(Arg1, CI->getType(), "", CI);
+ } else {
+ if (isLong)
+ V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
+ else
+ V0 = Arg0;
+ V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
+ }
+}
+
+/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
+/// or vabal intrinsics, construct a call to a vabd intrinsic. Examine the
+/// name of the old intrinsic to determine whether to use a signed or unsigned
+/// vabd intrinsic. Get the type from the old call instruction, adjusted for
+/// half-size vector elements if the old intrinsic was vabdl or vabal.
+static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
+ Function *F = CI->getCalledFunction();
+ const std::string& Name = F->getName();
+ bool isLong = (Name.at(18) == 'l');
+ bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
+
+ Intrinsic::ID intID;
+ if (isSigned)
+ intID = Intrinsic::arm_neon_vabds;
+ else
+ intID = Intrinsic::arm_neon_vabdu;
+
+ const Type *Ty = CI->getType();
+ if (isLong)
+ Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
+
+ Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
+ Value *Operands[2];
+ Operands[0] = Arg0;
+ Operands[1] = Arg1;
+ return CallInst::Create(VABD, Operands, Operands+2,
+ "upgraded."+CI->getName(), CI);
+}
+
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
@@ -320,6 +443,60 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
assert(F && "CallInst has no function associated with it.");
if (!NewFn) {
+ // Get the Function's name.
+ const std::string& Name = F->getName();
+
+ // Upgrade ARM NEON intrinsics.
+ if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+ Instruction *NewI;
+ Value *V0, *V1;
+ if (Name.compare(14, 7, "vmovls.", 7) == 0) {
+ NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
+ NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vadd", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vsub", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
+ } else if (Name.compare(14, 4, "vmul", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
+ } else if (Name.compare(14, 4, "vmla", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+ Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+ NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vmls", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+ Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+ NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vabd", 4) == 0) {
+ NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
+ NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vaba", 4) == 0) {
+ NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
+ if (Name.at(18) == 'l')
+ NewI = new ZExtInst(NewI, CI->getType(), "", CI);
+ NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
+ NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else {
+ llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
+ }
+ // Replace any uses of the old CallInst.
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
bool isLoadH = false, isLoadL = false, isMovL = false;
bool isMovSD = false, isShufPD = false;
bool isUnpckhPD = false, isUnpcklPD = false;
@@ -398,7 +575,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
} else if (isShufPD) {
Value *Op1 = CI->getArgOperand(1);
- unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ unsigned MaskVal =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
((MaskVal >> 1) & 1)+2));
@@ -547,7 +725,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
switch (NewFn->getIntrinsicID()) {
- default: llvm_unreachable("Unknown function for CallInst upgrade.");
+ default: llvm_unreachable("Unknown function for CallInst upgrade.");
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ // Add a default alignment argument of 1.
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+ Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+ CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+ CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(NewCI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ break;
+ }
+
case Intrinsic::x86_mmx_psll_d:
case Intrinsic::x86_mmx_psll_q:
case Intrinsic::x86_mmx_psll_w:
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index c64564b8a1e7..1388c93cce39 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMCore
Module.cpp
Pass.cpp
PassManager.cpp
+ PassRegistry.cpp
PrintModulePass.cpp
Type.cpp
TypeSymbolTable.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 35672661e445..9a91dafab2ff 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -357,22 +357,6 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
}
}
- if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- unsigned NumElems = UTy->getNumElements();
- // Check for a union with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(UTy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberSize;
- }
-
// Pointer size doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -438,24 +422,6 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
return MemberAlign;
}
- if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- // Union alignment is the maximum alignment of any member.
- // Without target data, we can't compare much, but we can check to see
- // if all the members have the same alignment.
- unsigned NumElems = UTy->getNumElements();
- // Check for a union with all members having the same alignment.
- Constant *MemberAlign =
- getFoldedAlignOf(UTy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberAlign;
- }
-
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -909,8 +875,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
unsigned numOps;
if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
- else if (AggTy->isUnionTy())
- numOps = 1;
else
numOps = cast<StructType>(AggTy)->getNumElements();
@@ -927,10 +891,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
if (const StructType* ST = dyn_cast<StructType>(AggTy))
return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
- if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) {
- assert(Ops.size() == 1 && "Union can only contain a single value!");
- return ConstantUnion::get(UT, Ops[0]);
- }
return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 00b009401dcc..16eaca81048b 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -59,7 +59,6 @@ Constant *Constant::getNullValue(const Type *Ty) {
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
- case Type::UnionTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
return ConstantAggregateZero::get(Ty);
@@ -526,6 +525,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
bool AddNull) {
std::vector<Constant*> ElementVals;
+ ElementVals.reserve(Str.size() + size_t(AddNull));
for (unsigned i = 0; i < Str.size(); ++i)
ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
@@ -586,27 +586,6 @@ Constant* ConstantStruct::get(LLVMContext &Context,
return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
}
-ConstantUnion::ConstantUnion(const UnionType *T, Constant* V)
- : Constant(T, ConstantUnionVal,
- OperandTraits<ConstantUnion>::op_end(this) - 1, 1) {
- Use *OL = OperandList;
- assert(T->getElementTypeIndex(V->getType()) >= 0 &&
- "Initializer for union element isn't a member of union type!");
- *OL = V;
-}
-
-// ConstantUnion accessors.
-Constant* ConstantUnion::get(const UnionType* T, Constant* V) {
- LLVMContextImpl* pImpl = T->getContext().pImpl;
-
- // Create a ConstantAggregateZero value if all elements are zeros...
- if (!V->isNullValue())
- return pImpl->UnionConstants.getOrCreate(T, V);
-
- return ConstantAggregateZero::get(T);
-}
-
-
ConstantVector::ConstantVector(const VectorType *T,
const std::vector<Constant*> &V)
: Constant(T, ConstantVectorVal,
@@ -723,7 +702,7 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
if (getOpcode() != Instruction::GetElementPtr) return false;
gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
- User::const_op_iterator OI = next(this->op_begin());
+ User::const_op_iterator OI = llvm::next(this->op_begin());
// Skip the first index, as it has no static limit.
++GEPI;
@@ -945,8 +924,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
// Factory Function Implementation
ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
- assert((Ty->isStructTy() || Ty->isUnionTy()
- || Ty->isArrayTy() || Ty->isVectorTy()) &&
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -956,14 +934,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantAggregateZero::destroyConstant() {
- getType()->getContext().pImpl->AggZeroConstants.remove(this);
+ getRawType()->getContext().pImpl->AggZeroConstants.remove(this);
destroyConstantImpl();
}
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantArray::destroyConstant() {
- getType()->getContext().pImpl->ArrayConstants.remove(this);
+ getRawType()->getContext().pImpl->ArrayConstants.remove(this);
destroyConstantImpl();
}
@@ -1027,21 +1005,14 @@ namespace llvm {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantStruct::destroyConstant() {
- getType()->getContext().pImpl->StructConstants.remove(this);
- destroyConstantImpl();
-}
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantUnion::destroyConstant() {
- getType()->getContext().pImpl->UnionConstants.remove(this);
+ getRawType()->getContext().pImpl->StructConstants.remove(this);
destroyConstantImpl();
}
// destroyConstant - Remove the constant from the constant table...
//
void ConstantVector::destroyConstant() {
- getType()->getContext().pImpl->VectorConstants.remove(this);
+ getRawType()->getContext().pImpl->VectorConstants.remove(this);
destroyConstantImpl();
}
@@ -1082,7 +1053,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantPointerNull::destroyConstant() {
- getType()->getContext().pImpl->NullPtrConstants.remove(this);
+ getRawType()->getContext().pImpl->NullPtrConstants.remove(this);
destroyConstantImpl();
}
@@ -1097,7 +1068,7 @@ UndefValue *UndefValue::get(const Type *Ty) {
// destroyConstant - Remove the constant from the constant table.
//
void UndefValue::destroyConstant() {
- getType()->getContext().pImpl->UndefValueConstants.remove(this);
+ getRawType()->getContext().pImpl->UndefValueConstants.remove(this);
destroyConstantImpl();
}
@@ -1131,7 +1102,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
// destroyConstant - Remove the constant from the constant table.
//
void BlockAddress::destroyConstant() {
- getFunction()->getType()->getContext().pImpl
+ getFunction()->getRawType()->getContext().pImpl
->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
getBasicBlock()->AdjustBlockAddressRefCount(-1);
destroyConstantImpl();
@@ -1930,7 +1901,7 @@ Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantExpr::destroyConstant() {
- getType()->getContext().pImpl->ExprConstants.remove(this);
+ getRawType()->getContext().pImpl->ExprConstants.remove(this);
destroyConstantImpl();
}
@@ -1971,11 +1942,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
Constant *ToC = cast<Constant>(To);
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
+ LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
- Lookup.first.first = getType();
+ Lookup.first.first = cast<ArrayType>(getRawType());
Lookup.second = this;
std::vector<Constant*> &Values = Lookup.first.second;
@@ -2009,7 +1979,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
Constant *Replacement = 0;
if (isAllZeros) {
- Replacement = ConstantAggregateZero::get(getType());
+ Replacement = ConstantAggregateZero::get(getRawType());
} else {
// Check to see if we have this array type already.
bool Exists;
@@ -2060,7 +2030,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
- Lookup.first.first = getType();
+ Lookup.first.first = cast<StructType>(getRawType());
Lookup.second = this;
std::vector<Constant*> &Values = Lookup.first.second;
Values.reserve(getNumOperands()); // Build replacement struct.
@@ -2082,14 +2052,13 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
}
Values[OperandToUpdate] = ToC;
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
+ LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
Constant *Replacement = 0;
if (isAllZeros) {
- Replacement = ConstantAggregateZero::get(getType());
+ Replacement = ConstantAggregateZero::get(getRawType());
} else {
- // Check to see if we have this array type already.
+ // Check to see if we have this struct type already.
bool Exists;
LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
@@ -2118,56 +2087,6 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
destroyConstant();
}
-void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
- Use *U) {
- assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
- Constant *ToC = cast<Constant>(To);
-
- assert(U == OperandList && "Union constants can only have one use!");
- assert(getNumOperands() == 1 && "Union constants can only have one use!");
- assert(getOperand(0) == From && "ReplaceAllUsesWith broken!");
-
- std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup;
- Lookup.first.first = getType();
- Lookup.second = this;
- Lookup.first.second = ToC;
-
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
-
- Constant *Replacement = 0;
- if (ToC->isNullValue()) {
- Replacement = ConstantAggregateZero::get(getType());
- } else {
- // Check to see if we have this union type already.
- bool Exists;
- LLVMContextImpl::UnionConstantsTy::MapTy::iterator I =
- pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists);
-
- if (Exists) {
- Replacement = I->second;
- } else {
- // Okay, the new shape doesn't exist in the system yet. Instead of
- // creating a new constant union, inserting it, replaceallusesof'ing the
- // old with the new, then deleting the old... just update the current one
- // in place!
- pImpl->UnionConstants.MoveConstantToNewSlot(this, I);
-
- // Update to the new value.
- setOperand(0, ToC);
- return;
- }
- }
-
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- uncheckedReplaceAllUsesWith(Replacement);
-
- // Delete the old constant!
- destroyConstant();
-}
-
void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
@@ -2180,7 +2099,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Values.push_back(Val);
}
- Constant *Replacement = get(getType(), Values);
+ Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
assert(Replacement != this && "I didn't contain From!");
// Everyone using this now uses the replacement.
@@ -2227,7 +2146,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
&Indices[0], Indices.size());
} else if (isCast()) {
assert(getOperand(0) == From && "Cast only has one use!");
- Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
+ Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType());
} else if (getOpcode() == Instruction::Select) {
Constant *C1 = getOperand(0);
Constant *C2 = getOperand(1);
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 2f2fac53f062..1c04c3e1987e 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -511,14 +511,6 @@ struct ConstantKeyData<ConstantStruct> {
}
};
-template<>
-struct ConstantKeyData<ConstantUnion> {
- typedef Constant* ValType;
- static ValType getValType(ConstantUnion *CU) {
- return cast<Constant>(CU->getOperand(0));
- }
-};
-
// ConstantPointerNull does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
@@ -757,9 +749,13 @@ public:
// If this constant is the representative element for its abstract type,
// update the AbstractTypeMap so that the representative element is I.
- if (C->getType()->isAbstract()) {
+ //
+ // This must use getRawType() because if the type is under refinement, we
+ // will get the refineAbstractType callback below, and we don't want to
+ // kick union find in on the constant.
+ if (C->getRawType()->isAbstract()) {
typename AbstractTypeMapTy::iterator ATI =
- AbstractTypeMap.find(C->getType());
+ AbstractTypeMap.find(cast<DerivedType>(C->getRawType()));
assert(ATI != AbstractTypeMap.end() &&
"Abstract type not in AbstractTypeMap?");
if (ATI->second == OldI)
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index ca1a399fe8aa..5aad19dd2a4a 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -22,6 +22,7 @@
#include "llvm/TypeSymbolTable.h"
#include "llvm/InlineAsm.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -155,8 +156,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMFunctionTypeKind;
case Type::StructTyID:
return LLVMStructTypeKind;
- case Type::UnionTyID:
- return LLVMUnionTypeKind;
case Type::ArrayTyID:
return LLVMArrayTypeKind;
case Type::PointerTyID:
@@ -315,34 +314,6 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
return unwrap<StructType>(StructTy)->isPacked();
}
-/*--.. Operations on union types ..........................................--*/
-
-LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
- unsigned ElementCount) {
- SmallVector<const Type*, 8> Tys;
- for (LLVMTypeRef *I = ElementTypes,
- *E = ElementTypes + ElementCount; I != E; ++I)
- Tys.push_back(unwrap(*I));
-
- return wrap(UnionType::get(&Tys[0], Tys.size()));
-}
-
-LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) {
- return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
- ElementCount);
-}
-
-unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) {
- return unwrap<UnionType>(UnionTy)->getNumElements();
-}
-
-void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) {
- UnionType *Ty = unwrap<UnionType>(UnionTy);
- for (FunctionType::param_iterator I = Ty->element_begin(),
- E = Ty->element_end(); I != E; ++I)
- *Dest++ = wrap(*I);
-}
-
/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -488,6 +459,14 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
return wrap(unwrap<User>(Val)->getOperand(Index));
}
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+ unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+ return unwrap<User>(Val)->getNumOperands();
+}
+
/*--.. Operations on constants of any type .................................--*/
LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
@@ -619,10 +598,6 @@ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
return wrap(ConstantVector::get(
unwrap<Constant>(ScalarConstantVals, Size), Size));
}
-LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) {
- return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val)));
-}
-
/*--.. Constant expressions ................................................--*/
LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
@@ -1060,6 +1035,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMLinkerPrivateLinkage;
case GlobalValue::LinkerPrivateWeakLinkage:
return LLVMLinkerPrivateWeakLinkage;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ return LLVMLinkerPrivateWeakDefAutoLinkage;
case GlobalValue::DLLImportLinkage:
return LLVMDLLImportLinkage;
case GlobalValue::DLLExportLinkage:
@@ -1113,6 +1090,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
case LLVMLinkerPrivateWeakLinkage:
GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
break;
+ case LLVMLinkerPrivateWeakDefAutoLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
+ break;
case LLVMDLLImportLinkage:
GV->setLinkage(GlobalValue::DLLImportLinkage);
break;
@@ -1515,6 +1495,14 @@ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
unwrap(BBRef)->eraseFromParent();
}
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
/*--.. Operations on instructions ..........................................--*/
LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
@@ -2223,3 +2211,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
delete unwrap(MemBuf);
}
+
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+ return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+ return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+ return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+ delete unwrap(PM);
+}
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 10a866fab622..f3dad824461d 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -51,8 +52,8 @@ TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
char DominatorTree::ID = 0;
-static RegisterPass<DominatorTree>
-E("domtree", "Dominator Tree Construction", true, true);
+INITIALIZE_PASS(DominatorTree, "domtree",
+ "Dominator Tree Construction", true, true);
bool DominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -105,8 +106,8 @@ bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
//===----------------------------------------------------------------------===//
char DominanceFrontier::ID = 0;
-static RegisterPass<DominanceFrontier>
-G("domfrontier", "Dominance Frontier Construction", true, true);
+INITIALIZE_PASS(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true);
void DominanceFrontier::verifyAnalysis() const {
if (!VerifyDomInfo) return;
@@ -122,36 +123,23 @@ void DominanceFrontier::verifyAnalysis() const {
// NewBB is split and now it has one successor. Update dominance frontier to
// reflect this change.
void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
- assert(NewBB->getTerminator()->getNumSuccessors() == 1
- && "NewBB should have a single successor!");
+ assert(NewBB->getTerminator()->getNumSuccessors() == 1 &&
+ "NewBB should have a single successor!");
BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
- SmallVector<BasicBlock*, 8> PredBlocks;
- for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
- PI != PE; ++PI)
- PredBlocks.push_back(*PI);
-
- if (PredBlocks.empty())
- // If NewBB does not have any predecessors then it is a entry block.
- // In this case, NewBB and its successor NewBBSucc dominates all
- // other blocks.
- return;
-
// NewBBSucc inherits original NewBB frontier.
DominanceFrontier::iterator NewBBI = find(NewBB);
- if (NewBBI != end()) {
- DominanceFrontier::DomSetType NewBBSet = NewBBI->second;
- DominanceFrontier::DomSetType NewBBSuccSet;
- NewBBSuccSet.insert(NewBBSet.begin(), NewBBSet.end());
- addBasicBlock(NewBBSucc, NewBBSuccSet);
- }
+ if (NewBBI != end())
+ addBasicBlock(NewBBSucc, NewBBI->second);
// If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the
- // DF(PredBlocks[0]) without the stuff that the new block does not dominate
+ // DF(NewBBSucc) without the stuff that the new block does not dominate
// a predecessor of.
DominatorTree &DT = getAnalysis<DominatorTree>();
- if (DT.dominates(NewBB, NewBBSucc)) {
- DominanceFrontier::iterator DFI = find(PredBlocks[0]);
+ DomTreeNode *NewBBNode = DT.getNode(NewBB);
+ DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc);
+ if (DT.dominates(NewBBNode, NewBBSuccNode)) {
+ DominanceFrontier::iterator DFI = find(NewBBSucc);
if (DFI != end()) {
DominanceFrontier::DomSetType Set = DFI->second;
// Filter out stuff in Set that we do not dominate a predecessor of.
@@ -160,8 +148,10 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
bool DominatesPred = false;
for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
PI != E; ++PI)
- if (DT.dominates(NewBB, *PI))
+ if (DT.dominates(NewBBNode, DT.getNode(*PI))) {
DominatesPred = true;
+ break;
+ }
if (!DominatesPred)
Set.erase(SetI++);
else
@@ -186,50 +176,71 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
NewDFSet.insert(NewBBSucc);
addBasicBlock(NewBB, NewDFSet);
}
-
- // Now we must loop over all of the dominance frontiers in the function,
- // replacing occurrences of NewBBSucc with NewBB in some cases. All
- // blocks that dominate a block in PredBlocks and contained NewBBSucc in
- // their dominance frontier must be updated to contain NewBB instead.
- //
- for (Function::iterator FI = NewBB->getParent()->begin(),
- FE = NewBB->getParent()->end(); FI != FE; ++FI) {
- DominanceFrontier::iterator DFI = find(FI);
- if (DFI == end()) continue; // unreachable block.
-
- // Only consider nodes that have NewBBSucc in their dominator frontier.
- if (!DFI->second.count(NewBBSucc)) continue;
-
- // Verify whether this block dominates a block in predblocks. If not, do
- // not update it.
- bool BlockDominatesAny = false;
- for (SmallVectorImpl<BasicBlock*>::const_iterator BI = PredBlocks.begin(),
- BE = PredBlocks.end(); BI != BE; ++BI) {
- if (DT.dominates(FI, *BI)) {
- BlockDominatesAny = true;
+
+ // Now update dominance frontiers which either used to contain NewBBSucc
+ // or which now need to include NewBB.
+
+ // Collect the set of blocks which dominate a predecessor of NewBB or
+ // NewSuccBB and which don't dominate both. This is an initial
+ // approximation of the blocks whose dominance frontiers will need updates.
+ SmallVector<DomTreeNode *, 16> AllPredDoms;
+
+ // Compute the block which dominates both NewBBSucc and NewBB. This is
+ // the immediate dominator of NewBBSucc unless NewBB dominates NewBBSucc.
+ // The code below which climbs dominator trees will stop at this point,
+ // because from this point up, dominance frontiers are unaffected.
+ DomTreeNode *DominatesBoth = 0;
+ if (NewBBSuccNode) {
+ DominatesBoth = NewBBSuccNode->getIDom();
+ if (DominatesBoth == NewBBNode)
+ DominatesBoth = NewBBNode->getIDom();
+ }
+
+ // Collect the set of all blocks which dominate a predecessor of NewBB.
+ SmallPtrSet<DomTreeNode *, 8> NewBBPredDoms;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI)
+ for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
+ if (DTN == DominatesBoth)
break;
- }
+ if (!NewBBPredDoms.insert(DTN))
+ break;
+ AllPredDoms.push_back(DTN);
}
- // If NewBBSucc should not stay in our dominator frontier, remove it.
- // We remove it unless there is a predecessor of NewBBSucc that we
- // dominate, but we don't strictly dominate NewBBSucc.
- bool ShouldRemove = true;
- if ((BasicBlock*)FI == NewBBSucc || !DT.dominates(FI, NewBBSucc)) {
- // Okay, we know that PredDom does not strictly dominate NewBBSucc.
- // Check to see if it dominates any predecessors of NewBBSucc.
- for (pred_iterator PI = pred_begin(NewBBSucc),
- E = pred_end(NewBBSucc); PI != E; ++PI)
- if (DT.dominates(FI, *PI)) {
- ShouldRemove = false;
- break;
- }
+ // Collect the set of all blocks which dominate a predecessor of NewSuccBB.
+ SmallPtrSet<DomTreeNode *, 8> NewBBSuccPredDoms;
+ for (pred_iterator PI = pred_begin(NewBBSucc),
+ E = pred_end(NewBBSucc); PI != E; ++PI)
+ for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
+ if (DTN == DominatesBoth)
+ break;
+ if (!NewBBSuccPredDoms.insert(DTN))
+ break;
+ if (!NewBBPredDoms.count(DTN))
+ AllPredDoms.push_back(DTN);
}
-
- if (ShouldRemove)
- removeFromFrontier(DFI, NewBBSucc);
- if (BlockDominatesAny && (&*FI == NewBB || !DT.dominates(FI, NewBB)))
+
+ // Visit all relevant dominance frontiers and make any needed updates.
+ for (SmallVectorImpl<DomTreeNode *>::const_iterator I = AllPredDoms.begin(),
+ E = AllPredDoms.end(); I != E; ++I) {
+ DomTreeNode *DTN = *I;
+ iterator DFI = find((*I)->getBlock());
+
+ // Only consider nodes that have NewBBSucc in their dominator frontier.
+ if (DFI == end() || !DFI->second.count(NewBBSucc)) continue;
+
+ // If the block dominates a predecessor of NewBB but does not properly
+ // dominate NewBB itself, add NewBB to its dominance frontier.
+ if (NewBBPredDoms.count(DTN) &&
+ !DT.properlyDominates(DTN, NewBBNode))
addToFrontier(DFI, NewBB);
+
+ // If the block does not dominate a predecessor of NewBBSucc or
+ // properly dominates NewBBSucc itself, remove NewBBSucc from its
+ // dominance frontier.
+ if (!NewBBSuccPredDoms.count(DTN) ||
+ DT.properlyDominates(DTN, NewBBSuccNode))
+ removeFromFrontier(DFI, NewBBSucc);
}
}
@@ -343,3 +354,7 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
}
}
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index b758eb8702ae..96716eeb349b 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -102,7 +102,14 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
setVisibility(Src->getVisibility());
}
-
+void GlobalValue::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ Alignment = Log2_32(Align) + 1;
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
//===----------------------------------------------------------------------===//
// GlobalVariable Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 0d2eca9c3dea..69f713b2c42c 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -164,7 +164,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
StringRef::iterator ConstraintEnd = std::find(I, E, ',');
if (ConstraintEnd == I || // Empty constraint like ",,"
- Info.Parse(std::string(I, ConstraintEnd), Result)) {
+ Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
Result.clear(); // Erroneous constraint?
break;
}
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 9792adaaa122..05bed4c64316 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -49,8 +49,8 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
// Out of line virtual method, so the vtable, etc has a home.
Instruction::~Instruction() {
assert(Parent == 0 && "Instruction still linked in the program!");
- if (hasMetadata())
- removeAllMetadata();
+ if (hasMetadataHashEntry())
+ clearMetadataHashEntries();
}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index c13696f22902..401802ed13d5 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -33,10 +33,8 @@ using namespace llvm;
User::op_iterator CallSite::getCallee() const {
Instruction *II(getInstruction());
return isCall()
- ? (CallInst::ArgOffset
- ? cast</*FIXME: CallInst*/User>(II)->op_begin()
- : cast</*FIXME: CallInst*/User>(II)->op_end() - 1)
- : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
+ ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+ : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
}
//===----------------------------------------------------------------------===//
@@ -233,7 +231,7 @@ CallInst::~CallInst() {
void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert(NumOperands == NumParams+1 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
+ Op<-1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -246,15 +244,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Params[i]->getType()) &&
"Calling a function with a bad signature!");
- OperandList[i + ArgOffset] = Params[i];
+ OperandList[i] = Params[i];
}
}
void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
assert(NumOperands == 3 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
- Op<ArgOffset + 0>() = Actual1;
- Op<ArgOffset + 1>() = Actual2;
+ Op<-1>() = Func;
+ Op<0>() = Actual1;
+ Op<1>() = Actual2;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -273,8 +271,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
void CallInst::init(Value *Func, Value *Actual) {
assert(NumOperands == 2 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
- Op<ArgOffset + 0>() = Actual;
+ Op<-1>() = Func;
+ Op<0>() = Actual;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -290,7 +288,7 @@ void CallInst::init(Value *Func, Value *Actual) {
void CallInst::init(Value *Func) {
assert(NumOperands == 1 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
+ Op<-1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -893,6 +891,8 @@ AllocaInst::~AllocaInst() {
void AllocaInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData(Log2_32(Align) + 1);
assert(getAlignment() == Align && "Alignment representation error!");
}
@@ -1028,8 +1028,11 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
void LoadInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
((Log2_32(Align)+1)<<1));
+ assert(getAlignment() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1124,8 +1127,11 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
void StoreInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
((Log2_32(Align)+1) << 1));
+ assert(getAlignment() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1424,9 +1430,24 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
- if (!isa<Constant>(Mask) || MaskTy == 0 ||
- !MaskTy->getElementType()->isIntegerTy(32))
+ if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+ return false;
+
+ // Check to see if Mask is valid.
+ if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+ const VectorType *VTy = cast<VectorType>(V1->getType());
+ for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+ if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ if (CI->uge(VTy->getNumElements()*2))
+ return false;
+ } else if (!isa<UndefValue>(MV->getOperand(i))) {
+ return false;
+ }
+ }
+ }
+ else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
return false;
+
return true;
}
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 4d61363b9394..563c651315a3 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -110,21 +110,18 @@ static bool isValidName(StringRef MDName) {
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
unsigned LLVMContext::getMDKindID(StringRef Name) const {
assert(isValidName(Name) && "Invalid MDNode name");
-
- unsigned &Entry = pImpl->CustomMDKindNames[Name];
-
+
// If this is new, assign it its ID.
- if (Entry == 0) Entry = pImpl->CustomMDKindNames.size();
- return Entry;
+ return
+ pImpl->CustomMDKindNames.GetOrCreateValue(
+ Name, pImpl->CustomMDKindNames.size()).second;
}
/// getHandlerNames - Populate client supplied smallvector using custome
/// metadata name and ID.
void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
- Names.resize(pImpl->CustomMDKindNames.size()+1);
- Names[0] = "";
+ Names.resize(pImpl->CustomMDKindNames.size());
for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
E = pImpl->CustomMDKindNames.end(); I != E; ++I)
- // MD Handlers are numbered from 1.
Names[I->second] = I->first();
}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 9e41a0815608..93a075f0fccb 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -57,14 +57,11 @@ LLVMContextImpl::~LLVMContextImpl() {
DropReferences());
std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
DropReferences());
- std::for_each(UnionConstants.map_begin(), UnionConstants.map_end(),
- DropReferences());
std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
DropReferences());
ExprConstants.freeConstants();
ArrayConstants.freeConstants();
StructConstants.freeConstants();
- UnionConstants.freeConstants();
VectorConstants.freeConstants();
AggZeroConstants.freeConstants();
NullPtrConstants.freeConstants();
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 4876f5d5075a..51b2992898c0 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -144,10 +144,6 @@ public:
ConstantStruct, true /*largekey*/> StructConstantsTy;
StructConstantsTy StructConstants;
- typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion>
- UnionConstantsTy;
- UnionConstantsTy UnionConstants;
-
typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
ConstantVector> VectorConstantsTy;
VectorConstantsTy VectorConstants;
@@ -192,7 +188,6 @@ public:
TypeMap<PointerValType, PointerType> PointerTypes;
TypeMap<FunctionValType, FunctionType> FunctionTypes;
TypeMap<StructValType, StructType> StructTypes;
- TypeMap<UnionValType, UnionType> UnionTypes;
TypeMap<IntegerValType, IntegerType> IntegerTypes;
// Opaque types are not structurally uniqued, so don't use TypeMap.
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 3100d4ac7c9c..da69c43ff735 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/SmallString.h"
#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ValueHandle.h"
using namespace llvm;
@@ -186,6 +187,21 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
unsigned NumVals, FunctionLocalness FL,
bool Insert) {
LLVMContextImpl *pImpl = Context.pImpl;
+
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != NumVals; ++i)
+ ID.AddPointer(Vals[i]);
+
+ void *InsertPoint;
+ MDNode *N = NULL;
+
+ if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+ return N;
+
bool isFunctionLocal = false;
switch (FL) {
case FL_Unknown:
@@ -206,20 +222,6 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
break;
}
- FoldingSetNodeID ID;
- for (unsigned i = 0; i != NumVals; ++i)
- ID.AddPointer(Vals[i]);
- ID.AddBoolean(isFunctionLocal);
-
- void *InsertPoint;
- MDNode *N = NULL;
-
- if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
- return N;
-
- if (!Insert)
- return NULL;
-
// Coallocate space for the node and Operands together, then placement new.
void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
@@ -244,15 +246,42 @@ MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals,
return getMDNode(Context, Vals, NumVals, FL_Unknown, false);
}
+MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals,
+ unsigned NumVals) {
+ MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
+ N = new (N) MDNode(Context, Vals, NumVals, FL_No);
+ N->setValueSubclassData(N->getSubclassDataFromValue() |
+ NotUniquedBit);
+ LeakDetector::addGarbageObject(N);
+ return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+ assert(N->use_empty() && "Temporary MDNode has uses!");
+ assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+ "Deleting a non-temporary uniqued node!");
+ assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+ "Deleting a non-temporary non-uniqued node!");
+ assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+ "Temporary MDNode does not have NotUniquedBit set!");
+ assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+ "Temporary MDNode has DestroyFlag set!");
+ LeakDetector::removeGarbageObject(N);
+ N->destroy();
+}
+
/// getOperand - Return specified operand.
Value *MDNode::getOperand(unsigned i) const {
return *getOperandPtr(const_cast<MDNode*>(this), i);
}
void MDNode::Profile(FoldingSetNodeID &ID) const {
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
ID.AddPointer(getOperand(i));
- ID.AddBoolean(isFunctionLocal());
}
void MDNode::setIsNotUniqued() {
@@ -301,7 +330,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// If we are dropping an argument to null, we choose to not unique the MDNode
// anymore. This commonly occurs during destruction, and uniquing these
- // brings little reuse.
+ // brings little reuse. Also, this means we don't need to include
+ // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
if (To == 0) {
setIsNotUniqued();
return;
@@ -324,59 +354,35 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// InsertPoint will have been set by the FindNodeOrInsertPos call.
pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+ // If this MDValue was previously function-local but no longer is, clear
+ // its function-local flag.
+ if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+ bool isStillFunctionLocal = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Value *V = getOperand(i);
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isStillFunctionLocal = true;
+ break;
+ }
+ }
+ if (!isStillFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+ }
}
//===----------------------------------------------------------------------===//
// NamedMDNode implementation.
//
-namespace llvm {
-// SymbolTableListTraits specialization for MDSymbolTable.
-void ilist_traits<NamedMDNode>
-::addNodeToList(NamedMDNode *N) {
- assert(N->getParent() == 0 && "Value already in a container!!");
- Module *Owner = getListOwner();
- N->setParent(Owner);
- MDSymbolTable &ST = Owner->getMDSymbolTable();
- ST.insert(N->getName(), N);
-}
-
-void ilist_traits<NamedMDNode>::removeNodeFromList(NamedMDNode *N) {
- N->setParent(0);
- Module *Owner = getListOwner();
- MDSymbolTable &ST = Owner->getMDSymbolTable();
- ST.remove(N->getName());
-}
-}
-
-static SmallVector<WeakVH, 4> &getNMDOps(void *Operands) {
- return *(SmallVector<WeakVH, 4>*)Operands;
-}
-
-NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N,
- MDNode *const *MDs,
- unsigned NumMDs, Module *ParentModule)
- : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) {
- setName(N);
- Operands = new SmallVector<WeakVH, 4>();
-
- SmallVector<WeakVH, 4> &Node = getNMDOps(Operands);
- for (unsigned i = 0; i != NumMDs; ++i)
- Node.push_back(WeakVH(MDs[i]));
-
- if (ParentModule)
- ParentModule->getNamedMDList().push_back(this);
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+ return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
}
-NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) {
- assert(NMD && "Invalid source NamedMDNode!");
- SmallVector<MDNode *, 4> Elems;
- Elems.reserve(NMD->getNumOperands());
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- Elems.push_back(NMD->getOperand(i));
- return new NamedMDNode(NMD->getContext(), NMD->getName().data(),
- Elems.data(), Elems.size(), M);
+NamedMDNode::NamedMDNode(const Twine &N)
+ : Name(N.str()), Parent(0),
+ Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
}
NamedMDNode::~NamedMDNode() {
@@ -392,18 +398,20 @@ unsigned NamedMDNode::getNumOperands() const {
/// getOperand - Return specified operand.
MDNode *NamedMDNode::getOperand(unsigned i) const {
assert(i < getNumOperands() && "Invalid Operand number!");
- return dyn_cast_or_null<MDNode>(getNMDOps(Operands)[i]);
+ return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
}
/// addOperand - Add metadata Operand.
void NamedMDNode::addOperand(MDNode *M) {
- getNMDOps(Operands).push_back(WeakVH(M));
+ assert(!M->isFunctionLocal() &&
+ "NamedMDNode operands must not be function-local!");
+ getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
}
/// eraseFromParent - Drop all references and remove the node from parent
/// module.
void NamedMDNode::eraseFromParent() {
- getParent()->getNamedMDList().erase(this);
+ getParent()->eraseNamedMetadata(this);
}
/// dropAllReferences - Remove all uses and clear node vector.
@@ -411,22 +419,6 @@ void NamedMDNode::dropAllReferences() {
getNMDOps(Operands).clear();
}
-/// setName - Set the name of this named metadata.
-void NamedMDNode::setName(const Twine &NewName) {
- assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!");
-
- SmallString<256> NameData;
- StringRef NameRef = NewName.toStringRef(NameData);
-
- // Name isn't changing?
- if (getName() == NameRef)
- return;
-
- Name = NameRef.str();
- if (Parent)
- Parent->getMDSymbolTable().insert(NameRef, this);
-}
-
/// getName - Return a constant reference to this named metadata's name.
StringRef NamedMDNode::getName() const {
return StringRef(Name);
@@ -445,10 +437,6 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
-void Instruction::setDbgMetadata(MDNode *Node) {
- DbgLoc = DebugLoc::getFromDILocation(Node);
-}
-
/// setMetadata - Set the metadata of of the specified kind to the specified
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
@@ -567,13 +555,11 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
}
-/// removeAllMetadata - Remove all metadata from this instruction.
-void Instruction::removeAllMetadata() {
- assert(hasMetadata() && "Caller should check");
- DbgLoc = DebugLoc();
- if (hasMetadataHashEntry()) {
- getContext().pImpl->MetadataStore.erase(this);
- setHasMetadataHashEntry(false);
- }
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+ assert(hasMetadataHashEntry() && "Caller should check");
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
}
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 38a51dfd5d38..d7ddf96cb070 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -58,10 +58,10 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
//
Module::Module(StringRef MID, LLVMContext& C)
- : Context(C), Materializer(NULL), ModuleID(MID), DataLayout("") {
+ : Context(C), Materializer(NULL), ModuleID(MID) {
ValSymTab = new ValueSymbolTable();
TypeSymTab = new TypeSymbolTable();
- NamedMDSymTab = new MDSymbolTable();
+ NamedMDSymTab = new StringMap<NamedMDNode *>();
}
Module::~Module() {
@@ -73,7 +73,7 @@ Module::~Module() {
NamedMDList.clear();
delete ValSymTab;
delete TypeSymTab;
- delete NamedMDSymTab;
+ delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
}
/// Target endian information...
@@ -316,19 +316,28 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const {
NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
SmallString<256> NameData;
StringRef NameRef = Name.toStringRef(NameData);
- return NamedMDSymTab->lookup(NameRef);
+ return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
}
/// getOrInsertNamedMetadata - Return the first named MDNode in the module
/// with the specified name. This method returns a new NamedMDNode if a
/// NamedMDNode with the specified name is not found.
NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
- NamedMDNode *NMD = NamedMDSymTab->lookup(Name);
- if (!NMD)
- NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this);
+ NamedMDNode *&NMD =
+ (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+ if (!NMD) {
+ NMD = new NamedMDNode(Name);
+ NMD->setParent(this);
+ NamedMDList.push_back(NMD);
+ }
return NMD;
}
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+ static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+ NamedMDList.erase(NMD);
+}
+
//===----------------------------------------------------------------------===//
// Methods for easy access to the types in the module.
//
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index efd98af0f443..a7d7f61dd762 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -14,35 +14,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
-#include <algorithm>
-#include <map>
-#include <set>
using namespace llvm;
//===----------------------------------------------------------------------===//
// Pass Implementation
//
-Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) {
- assert(pid && "pid cannot be 0");
-}
-
-Pass::Pass(PassKind K, const void *pid)
- : Resolver(0), PassID((intptr_t)pid), Kind(K) {
- assert(pid && "pid cannot be 0");
-}
+Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
// Force out-of-line virtual method.
Pass::~Pass() {
@@ -61,8 +44,8 @@ PassManagerType ModulePass::getPotentialPassManagerType() const {
return PMT_ModulePassManager;
}
-bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
- return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0;
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+ return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
}
// dumpPassStructure - Implement the -debug-passes=Structure option
@@ -75,7 +58,9 @@ void Pass::dumpPassStructure(unsigned Offset) {
/// Registration templates, but can be overloaded directly.
///
const char *Pass::getPassName() const {
- if (const PassInfo *PI = getPassInfo())
+ AnalysisID AID = getPassID();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+ if (PI)
return PI->getPassName();
return "Unnamed pass: implement Pass::getPassName()";
}
@@ -101,7 +86,7 @@ void Pass::verifyAnalysis() const {
// By default, don't do anything.
}
-void *Pass::getAdjustedAnalysisPointer(const PassInfo *) {
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
return this;
}
@@ -150,30 +135,6 @@ Pass *FunctionPass::createPrinterPass(raw_ostream &O,
return createPrintFunctionPass(Banner, &O);
}
-// run - On a module, we run this pass by initializing, runOnFunction'ing once
-// for every function in the module, then by finalizing.
-//
-bool FunctionPass::runOnModule(Module &M) {
- bool Changed = doInitialization(M);
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) // Passes are not run on external functions!
- Changed |= runOnFunction(*I);
-
- return Changed | doFinalization(M);
-}
-
-// run - On a function, we simply initialize, run the function, then finalize.
-//
-bool FunctionPass::run(Function &F) {
- // Passes are not run on external functions!
- if (F.isDeclaration()) return false;
-
- bool Changed = doInitialization(*F.getParent());
- Changed |= runOnFunction(F);
- return Changed | doFinalization(*F.getParent());
-}
-
bool FunctionPass::doInitialization(Module &) {
// By default, don't do anything.
return false;
@@ -199,16 +160,6 @@ Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
return 0;
}
-// To run this pass on a function, we simply call runOnBasicBlock once for each
-// function.
-//
-bool BasicBlockPass::runOnFunction(Function &F) {
- bool Changed = doInitialization(F);
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- Changed |= runOnBasicBlock(*I);
- return Changed | doFinalization(F);
-}
-
bool BasicBlockPass::doInitialization(Module &) {
// By default, don't do anything.
return false;
@@ -233,161 +184,12 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
return PMT_BasicBlockPassManager;
}
-//===----------------------------------------------------------------------===//
-// Pass Registration mechanism
-//
-namespace {
-class PassRegistrar {
- /// Guards the contents of this class.
- mutable sys::SmartMutex<true> Lock;
-
- /// PassInfoMap - Keep track of the passinfo object for each registered llvm
- /// pass.
- typedef std::map<intptr_t, const PassInfo*> MapType;
- MapType PassInfoMap;
-
- typedef StringMap<const PassInfo*> StringMapType;
- StringMapType PassInfoStringMap;
-
- /// AnalysisGroupInfo - Keep track of information for each analysis group.
- struct AnalysisGroupInfo {
- std::set<const PassInfo *> Implementations;
- };
-
- /// AnalysisGroupInfoMap - Information for each analysis group.
- std::map<const PassInfo *, AnalysisGroupInfo> AnalysisGroupInfoMap;
-
-public:
-
- const PassInfo *GetPassInfo(intptr_t TI) const {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::const_iterator I = PassInfoMap.find(TI);
- return I != PassInfoMap.end() ? I->second : 0;
- }
-
- const PassInfo *GetPassInfo(StringRef Arg) const {
- sys::SmartScopedLock<true> Guard(Lock);
- StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
- return I != PassInfoStringMap.end() ? I->second : 0;
- }
-
- void RegisterPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
- bool Inserted =
- PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
- assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
- PassInfoStringMap[PI.getPassArgument()] = &PI;
- }
-
- void UnregisterPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
- assert(I != PassInfoMap.end() && "Pass registered but not in map!");
-
- // Remove pass from the map.
- PassInfoMap.erase(I);
- PassInfoStringMap.erase(PI.getPassArgument());
- }
-
- void EnumerateWith(PassRegistrationListener *L) {
- sys::SmartScopedLock<true> Guard(Lock);
- for (MapType::const_iterator I = PassInfoMap.begin(),
- E = PassInfoMap.end(); I != E; ++I)
- L->passEnumerate(I->second);
- }
-
-
- /// Analysis Group Mechanisms.
- void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
- const PassInfo *ImplementationInfo,
- bool isDefault) {
- sys::SmartScopedLock<true> Guard(Lock);
- AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
- assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
- "Cannot add a pass to the same analysis group more than once!");
- AGI.Implementations.insert(ImplementationInfo);
- if (isDefault) {
- assert(InterfaceInfo->getNormalCtor() == 0 &&
- "Default implementation for analysis group already specified!");
- assert(ImplementationInfo->getNormalCtor() &&
- "Cannot specify pass as default if it does not have a default ctor");
- InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
- }
- }
-};
-}
-
-static std::vector<PassRegistrationListener*> *Listeners = 0;
-static sys::SmartMutex<true> ListenersLock;
-
-static PassRegistrar *PassRegistrarObj = 0;
-static PassRegistrar *getPassRegistrar() {
- // Use double-checked locking to safely initialize the registrar when
- // we're running in multithreaded mode.
- PassRegistrar* tmp = PassRegistrarObj;
- if (llvm_is_multithreaded()) {
- sys::MemoryFence();
- if (!tmp) {
- llvm_acquire_global_lock();
- tmp = PassRegistrarObj;
- if (!tmp) {
- tmp = new PassRegistrar();
- sys::MemoryFence();
- PassRegistrarObj = tmp;
- }
- llvm_release_global_lock();
- }
- } else if (!tmp) {
- PassRegistrarObj = new PassRegistrar();
- }
-
- return PassRegistrarObj;
-}
-
-namespace {
-
-// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
-// Unfortunately, passes are registered with static ctors, and having
-// llvm_shutdown clear this map prevents successful ressurection after
-// llvm_shutdown is run. Ideally we should find a solution so that we don't
-// leak the map, AND can still resurrect after shutdown.
-void cleanupPassRegistrar(void*) {
- if (PassRegistrarObj) {
- delete PassRegistrarObj;
- PassRegistrarObj = 0;
- }
-}
-ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED;
-
-}
-
-// getPassInfo - Return the PassInfo data structure that corresponds to this
-// pass...
-const PassInfo *Pass::getPassInfo() const {
- return lookupPassInfo(PassID);
-}
-
-const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
- return getPassRegistrar()->GetPassInfo(TI);
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+ return PassRegistry::getPassRegistry()->getPassInfo(TI);
}
const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
- return getPassRegistrar()->GetPassInfo(Arg);
-}
-
-void PassInfo::registerPass() {
- getPassRegistrar()->RegisterPass(*this);
-
- // Notify any listeners.
- sys::SmartScopedLock<true> Lock(ListenersLock);
- if (Listeners)
- for (std::vector<PassRegistrationListener*>::iterator
- I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
- (*I)->passRegistered(this);
-}
-
-void PassInfo::unregisterPass() {
- getPassRegistrar()->UnregisterPass(*this);
+ return PassRegistry::getPassRegistry()->getPassInfo(Arg);
}
Pass *PassInfo::createPass() const {
@@ -404,32 +206,11 @@ Pass *PassInfo::createPass() const {
// RegisterAGBase implementation
//
-RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
- intptr_t PassID, bool isDefault)
- : PassInfo(Name, InterfaceID) {
-
- PassInfo *InterfaceInfo =
- const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID));
- if (InterfaceInfo == 0) {
- // First reference to Interface, register it now.
- registerPass();
- InterfaceInfo = this;
- }
- assert(isAnalysisGroup() &&
- "Trying to join an analysis group that is a normal pass!");
-
- if (PassID) {
- const PassInfo *ImplementationInfo = Pass::lookupPassInfo(PassID);
- assert(ImplementationInfo &&
- "Must register pass before adding to AnalysisGroup!");
-
- // Make sure we keep track of the fact that the implementation implements
- // the interface.
- PassInfo *IIPI = const_cast<PassInfo*>(ImplementationInfo);
- IIPI->addInterfaceImplemented(InterfaceInfo);
-
- getPassRegistrar()->RegisterAnalysisGroup(InterfaceInfo, IIPI, isDefault);
- }
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+ const void *PassID, bool isDefault)
+ : PassInfo(Name, InterfaceID) {
+ PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+ *this, isDefault);
}
@@ -440,31 +221,19 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
// PassRegistrationListener ctor - Add the current object to the list of
// PassRegistrationListeners...
PassRegistrationListener::PassRegistrationListener() {
- sys::SmartScopedLock<true> Lock(ListenersLock);
- if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
- Listeners->push_back(this);
+ PassRegistry::getPassRegistry()->addRegistrationListener(this);
}
// dtor - Remove object from list of listeners...
PassRegistrationListener::~PassRegistrationListener() {
- sys::SmartScopedLock<true> Lock(ListenersLock);
- std::vector<PassRegistrationListener*>::iterator I =
- std::find(Listeners->begin(), Listeners->end(), this);
- assert(Listeners && I != Listeners->end() &&
- "PassRegistrationListener not registered!");
- Listeners->erase(I);
-
- if (Listeners->empty()) {
- delete Listeners;
- Listeners = 0;
- }
+ PassRegistry::getPassRegistry()->removeRegistrationListener(this);
}
// enumeratePasses - Iterate over the registered passes, calling the
// passEnumerate callback on each PassInfo object.
//
void PassRegistrationListener::enumeratePasses() {
- getPassRegistrar()->EnumerateWith(this);
+ PassRegistry::getPassRegistry()->enumerateWith(this);
}
PassNameParser::~PassNameParser() {}
@@ -481,7 +250,7 @@ namespace {
void passEnumerate(const PassInfo *P) {
if (P->isCFGOnlyPass())
- CFGOnlyList.push_back(P);
+ CFGOnlyList.push_back(P->getTypeInfo());
}
};
}
@@ -501,15 +270,25 @@ void AnalysisUsage::setPreservesCFG() {
GetCFGOnlyPasses(Preserved).enumeratePasses();
}
-AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) {
- assert(ID && "Pass class not registered!");
- Required.push_back(ID);
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+ const PassInfo *PI = Pass::lookupPassInfo(Arg);
+ // If the pass exists, preserve it. Otherwise silently do nothing.
+ if (PI) Preserved.push_back(PI->getTypeInfo());
return *this;
}
-AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) {
- assert(ID && "Pass class not registered!");
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
Required.push_back(ID);
- RequiredTransitive.push_back(ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+ Required.push_back(&ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+ Required.push_back(&ID);
+ RequiredTransitive.push_back(&ID);
return *this;
}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 296b0d13a710..ab4d4e55c750 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the LLVM Pass Manager infrastructure.
+// This file implements the LLVM Pass Manager infrastructure.
//
//===----------------------------------------------------------------------===//
#include "llvm/PassManagers.h"
+#include "llvm/PassManager.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
@@ -24,8 +25,6 @@
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
-#include "llvm-c/Core.h"
#include <algorithm>
#include <cstdio>
#include <map>
@@ -82,30 +81,32 @@ PrintAfterAll("print-after-all",
/// This is a helper to determine whether to print IR before or
/// after a pass.
-static bool ShouldPrintBeforeOrAfterPass(Pass *P,
+static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
PassOptionList &PassesToPrint) {
- for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
- const llvm::PassInfo *PassInf = PassesToPrint[i];
- if (PassInf && P->getPassInfo())
- if (PassInf->getPassArgument() ==
- P->getPassInfo()->getPassArgument()) {
- return true;
- }
+ if (const llvm::PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
+ }
}
return false;
}
-
+
/// This is a utility to check whether a pass should have IR dumped
/// before it.
-static bool ShouldPrintBeforePass(Pass *P) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(P, PrintBefore);
+static bool ShouldPrintBeforePass(const void *PassID) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
}
/// This is a utility to check whether a pass should have IR dumped
/// after it.
-static bool ShouldPrintAfterPass(Pass *P) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(P, PrintAfter);
+static bool ShouldPrintAfterPass(const void *PassID) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
}
} // End of llvm namespace
@@ -124,9 +125,9 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
OS << "Releasing pass '";
else
OS << "Running pass '";
-
+
OS << P->getPassName() << "'";
-
+
if (M) {
OS << " on module '" << M->getModuleIdentifier() << "'.\n";
return;
@@ -162,8 +163,8 @@ class BBPassManager : public PMDataManager, public FunctionPass {
public:
static char ID;
- explicit BBPassManager(int Depth)
- : PMDataManager(Depth), FunctionPass(&ID) {}
+ explicit BBPassManager(int Depth)
+ : PMDataManager(Depth), FunctionPass(ID) {}
/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
@@ -202,8 +203,8 @@ public:
return BP;
}
- virtual PassManagerType getPassManagerType() const {
- return PMT_BasicBlockPassManager;
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
}
};
@@ -223,9 +224,9 @@ private:
bool wasRun;
public:
static char ID;
- explicit FunctionPassManagerImpl(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth),
- PMTopLevelManager(TLM_Function), wasRun(false) { }
+ explicit FunctionPassManagerImpl(int Depth) :
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -234,8 +235,8 @@ public:
void add(Pass *P) {
schedulePass(P);
}
-
- /// createPrinterPass - Get a function printer pass.
+
+ /// createPrinterPass - Get a function printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintFunctionPass(Banner, &O);
}
@@ -251,12 +252,12 @@ public:
/// doInitialization - Run all of the initializers for the function passes.
///
bool doInitialization(Module &M);
-
+
/// doFinalization - Run all of the finalizers for the function passes.
///
bool doFinalization(Module &M);
-
+
virtual PMDataManager *getAsPMDataManager() { return this; }
virtual Pass *getAsPass() { return this; }
@@ -265,7 +266,7 @@ public:
Info.setPreservesAll();
}
- inline void addTopLevelPass(Pass *P) {
+ void addTopLevelPass(Pass *P) {
if (ImmutablePass *IP = P->getAsImmutablePass()) {
// P is a immutable pass and it will be managed by this
// top level manager. Set up analysis resolver to connect them.
@@ -288,6 +289,7 @@ public:
};
char FunctionPassManagerImpl::ID = 0;
+
//===----------------------------------------------------------------------===//
// MPPassManager
//
@@ -298,11 +300,11 @@ class MPPassManager : public Pass, public PMDataManager {
public:
static char ID;
explicit MPPassManager(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth) { }
+ Pass(PT_PassManager, ID), PMDataManager(Depth) { }
// Delete on the fly managers.
virtual ~MPPassManager() {
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
I != E; ++I) {
FunctionPassManagerImpl *FPP = I->second;
@@ -310,7 +312,7 @@ public:
}
}
- /// createPrinterPass - Get a module printer pass.
+ /// createPrinterPass - Get a module printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintModulePass(&O, false, Banner);
}
@@ -329,10 +331,10 @@ public:
/// through getAnalysis interface.
virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
- /// Return function pass corresponding to PassInfo PI, that is
+ /// Return function pass corresponding to PassInfo PI, that is
/// required by module pass MP. Instantiate analysis pass, by using
/// its runOnFunction() for function F.
- virtual Pass* getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F);
+ virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
virtual const char *getPassName() const {
return "Module Pass Manager";
@@ -360,8 +362,8 @@ public:
return static_cast<ModulePass *>(PassVector[N]);
}
- virtual PassManagerType getPassManagerType() const {
- return PMT_ModulePassManager;
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
}
private:
@@ -383,8 +385,8 @@ class PassManagerImpl : public Pass,
public:
static char ID;
explicit PassManagerImpl(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth),
- PMTopLevelManager(TLM_Pass) { }
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new MPPassManager(1)) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -393,8 +395,8 @@ public:
void add(Pass *P) {
schedulePass(P);
}
-
- /// createPrinterPass - Get a module printer pass.
+
+ /// createPrinterPass - Get a module printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintModulePass(&O, false, Banner);
}
@@ -408,7 +410,7 @@ public:
Info.setPreservesAll();
}
- inline void addTopLevelPass(Pass *P) {
+ void addTopLevelPass(Pass *P) {
if (ImmutablePass *IP = P->getAsImmutablePass()) {
// P is a immutable pass and it will be managed by this
// top level manager. Set up analysis resolver to connect them.
@@ -451,7 +453,7 @@ class TimingInfo {
public:
// Use 'create' member to get this.
TimingInfo() : TG("... Pass execution timing report ...") {}
-
+
// TimingDtor - Print out information about timing information
~TimingInfo() {
// Delete all of the timers, which accumulate their info into the
@@ -469,7 +471,7 @@ public:
/// getPassTimer - Return the timer for the specified pass if it exists.
Timer *getPassTimer(Pass *P) {
- if (P->getAsPMDataManager())
+ if (P->getAsPMDataManager())
return 0;
sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
@@ -488,28 +490,20 @@ static TimingInfo *TheTimeInfo;
// PMTopLevelManager implementation
/// Initialize top level manager. Create first pass manager.
-PMTopLevelManager::PMTopLevelManager(enum TopLevelManagerType t) {
- if (t == TLM_Pass) {
- MPPassManager *MPP = new MPPassManager(1);
- MPP->setTopLevelManager(this);
- addPassManager(MPP);
- activeStack.push(MPP);
- } else if (t == TLM_Function) {
- FPPassManager *FPP = new FPPassManager(1);
- FPP->setTopLevelManager(this);
- addPassManager(FPP);
- activeStack.push(FPP);
- }
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+ PMDM->setTopLevelManager(this);
+ addPassManager(PMDM);
+ activeStack.push(PMDM);
}
/// Set pass P as the last user of the given analysis passes.
-void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
+void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
Pass *P) {
for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(),
E = AnalysisPasses.end(); I != E; ++I) {
Pass *AP = *I;
LastUser[AP] = P;
-
+
if (P == AP)
continue;
@@ -528,7 +522,7 @@ void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
/// Collect passes whose last user is P
void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
Pass *P) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
InversedLastUser.find(P);
if (DMI == InversedLastUser.end())
return;
@@ -544,7 +538,7 @@ void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
AnalysisUsage *AnUsage = NULL;
DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
- if (DMI != AnUsageMap.end())
+ if (DMI != AnUsageMap.end())
AnUsage = DMI->second;
else {
AnUsage = new AnalysisUsage();
@@ -568,8 +562,9 @@ void PMTopLevelManager::schedulePass(Pass *P) {
// If P is an analysis pass and it is available then do not
// generate the analysis again. Stale analysis info should not be
// available at this point.
- if (P->getPassInfo() &&
- P->getPassInfo()->isAnalysis() && findAnalysisPass(P->getPassInfo())) {
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+ if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
delete P;
return;
}
@@ -579,14 +574,15 @@ void PMTopLevelManager::schedulePass(Pass *P) {
bool checkAnalysis = true;
while (checkAnalysis) {
checkAnalysis = false;
-
+
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
E = RequiredSet.end(); I != E; ++I) {
-
+
Pass *AnalysisPass = findAnalysisPass(*I);
if (!AnalysisPass) {
- AnalysisPass = (*I)->createPass();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ AnalysisPass = PI->createPass();
if (P->getPotentialPassManagerType () ==
AnalysisPass->getPotentialPassManagerType())
// Schedule analysis pass that is managed by the same pass manager.
@@ -595,12 +591,12 @@ void PMTopLevelManager::schedulePass(Pass *P) {
AnalysisPass->getPotentialPassManagerType()) {
// Schedule analysis pass that is managed by a new manager.
schedulePass(AnalysisPass);
- // Recheck analysis passes to ensure that required analysises that
+ // Recheck analysis passes to ensure that required analyses that
// are already checked are still available.
checkAnalysis = true;
}
else
- // Do not schedule this analysis. Lower level analsyis
+ // Do not schedule this analysis. Lower level analsyis
// passes are run on the fly.
delete AnalysisPass;
}
@@ -632,16 +628,21 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(),
E = ImmutablePasses.end(); P == NULL && I != E; ++I) {
- const PassInfo *PI = (*I)->getPassInfo();
+ AnalysisID PI = (*I)->getPassID();
if (PI == AID)
P = *I;
// If Pass not found then check the interfaces implemented by Immutable Pass
if (!P) {
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
const std::vector<const PassInfo*> &ImmPI =
- PI->getInterfacesImplemented();
- if (std::find(ImmPI.begin(), ImmPI.end(), AID) != ImmPI.end())
- P = *I;
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ P = *I;
+ }
}
}
@@ -658,7 +659,7 @@ void PMTopLevelManager::dumpPasses() const {
for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
ImmutablePasses[i]->dumpPassStructure(0);
}
-
+
// Every class that derives from PMDataManager also derives from Pass
// (sometimes indirectly), but there's no inheritance relationship
// between PMDataManager and Pass, so we have to getAsPass to get
@@ -684,15 +685,16 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
(*I)->initializeAnalysisInfo();
-
+
// Initailize other pass managers
- for (SmallVector<PMDataManager *, 8>::iterator I = IndirectPassManagers.begin(),
- E = IndirectPassManagers.end(); I != E; ++I)
+ for (SmallVector<PMDataManager *, 8>::iterator
+ I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ I != E; ++I)
(*I)->initializeAnalysisInfo();
for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
DME = LastUser.end(); DMI != DME; ++DMI) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
InversedLastUser.find(DMI->second);
if (InvDMI != InversedLastUser.end()) {
SmallPtrSet<Pass *, 8> &L = InvDMI->second;
@@ -709,7 +711,7 @@ PMTopLevelManager::~PMTopLevelManager() {
for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
delete *I;
-
+
for (SmallVector<ImmutablePass *, 8>::iterator
I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
delete *I;
@@ -724,16 +726,19 @@ PMTopLevelManager::~PMTopLevelManager() {
/// Augement AvailableAnalysis by adding analysis made available by pass P.
void PMDataManager::recordAvailableAnalysis(Pass *P) {
- const PassInfo *PI = P->getPassInfo();
- if (PI == 0) return;
-
+ AnalysisID PI = P->getPassID();
+
AvailableAnalysis[PI] = P;
- //This pass is the current implementation of all of the interfaces it
- //implements as well.
- const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ assert(!AvailableAnalysis.empty());
+
+ // This pass is the current implementation of all of the interfaces it
+ // implements as well.
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+ if (PInf == 0) return;
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
for (unsigned i = 0, e = II.size(); i != e; ++i)
- AvailableAnalysis[II[i]] = P;
+ AvailableAnalysis[II[i]->getTypeInfo()] = P;
}
// Return true if P preserves high level analysis used by other
@@ -742,18 +747,18 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
if (AnUsage->getPreservesAll())
return true;
-
+
const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(),
E = HigherLevelAnalysis.end(); I != E; ++I) {
Pass *P1 = *I;
if (P1->getAsImmutablePass() == 0 &&
std::find(PreservedSet.begin(), PreservedSet.end(),
- P1->getPassInfo()) ==
+ P1->getPassID()) ==
PreservedSet.end())
return false;
}
-
+
return true;
}
@@ -788,7 +793,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
E = AvailableAnalysis.end(); I != E; ) {
std::map<AnalysisID, Pass*>::iterator Info = I++;
if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
if (PassDebugging >= Details) {
@@ -807,12 +812,12 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
if (!InheritedAnalysis[Index])
continue;
- for (std::map<AnalysisID, Pass*>::iterator
+ for (std::map<AnalysisID, Pass*>::iterator
I = InheritedAnalysis[Index]->begin(),
E = InheritedAnalysis[Index]->end(); I != E; ) {
std::map<AnalysisID, Pass *>::iterator Info = I++;
if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
if (PassDebugging >= Details) {
@@ -861,23 +866,24 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
P->releaseMemory();
}
- if (const PassInfo *PI = P->getPassInfo()) {
+ AnalysisID PI = P->getPassID();
+ if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
// Remove the pass itself (if it is not already removed).
AvailableAnalysis.erase(PI);
// Remove all interfaces this pass implements, for which it is also
// listed as the available implementation.
- const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
for (unsigned i = 0, e = II.size(); i != e; ++i) {
std::map<AnalysisID, Pass*>::iterator Pos =
- AvailableAnalysis.find(II[i]);
+ AvailableAnalysis.find(II[i]->getTypeInfo());
if (Pos != AvailableAnalysis.end() && Pos->second == P)
AvailableAnalysis.erase(Pos);
}
}
}
-/// Add pass P into the PassVector. Update
+/// Add pass P into the PassVector. Update
/// AvailableAnalysis appropriately if ProcessAnalysis is true.
void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
// This manager is going to manage pass P. Set up analysis resolver
@@ -902,7 +908,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
unsigned PDepth = this->getDepth();
- collectRequiredAnalysis(RequiredPasses,
+ collectRequiredAnalysis(RequiredPasses,
ReqAnalysisNotAvailable, P);
for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(),
E = RequiredPasses.end(); I != E; ++I) {
@@ -920,7 +926,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
TransferLastUses.push_back(PRequired);
// Keep track of higher level analysis used by this manager.
HigherLevelAnalysis.push_back(PRequired);
- } else
+ } else
llvm_unreachable("Unable to accomodate Required Pass");
}
@@ -937,11 +943,12 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
TransferLastUses.clear();
}
- // Now, take care of required analysises that are not available.
- for (SmallVector<AnalysisID, 8>::iterator
- I = ReqAnalysisNotAvailable.begin(),
+ // Now, take care of required analyses that are not available.
+ for (SmallVector<AnalysisID, 8>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
- Pass *AnalysisPass = (*I)->createPass();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ Pass *AnalysisPass = PI->createPass();
this->addLowerLevelRequiredPass(P, AnalysisPass);
}
@@ -963,10 +970,10 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator
+ for (AnalysisUsage::VectorType::const_iterator
I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+ RP.push_back(AnalysisPass);
else
RP_NotAvail.push_back(*I);
}
@@ -975,7 +982,7 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
E = IDs.end(); I != E; ++I) {
if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+ RP.push_back(AnalysisPass);
else
RP_NotAvail.push_back(*I);
}
@@ -1016,7 +1023,7 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
// Search Parents through TopLevelManager
if (SearchParent)
return TPM->findAnalysisPass(AID);
-
+
return NULL;
}
@@ -1030,7 +1037,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
return;
TPM->collectLastUses(LUses, P);
-
+
for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
E = LUses.end(); I != E; ++I) {
llvm::dbgs() << "--" << std::string(Offset*2, ' ');
@@ -1044,7 +1051,8 @@ void PMDataManager::dumpPassArguments() const {
if (PMDataManager *PMD = (*I)->getAsPMDataManager())
PMD->dumpPassArguments();
else
- if (const PassInfo *PI = (*I)->getPassInfo())
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
if (!PI->isAnalysisGroup())
dbgs() << " -" << PI->getPassArgument();
}
@@ -1093,7 +1101,7 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
void PMDataManager::dumpRequiredSet(const Pass *P) const {
if (PassDebugging < Details)
return;
-
+
AnalysisUsage analysisUsage;
P->getAnalysisUsage(analysisUsage);
dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
@@ -1102,7 +1110,7 @@ void PMDataManager::dumpRequiredSet(const Pass *P) const {
void PMDataManager::dumpPreservedSet(const Pass *P) const {
if (PassDebugging < Details)
return;
-
+
AnalysisUsage analysisUsage;
P->getAnalysisUsage(analysisUsage);
dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
@@ -1116,7 +1124,8 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
for (unsigned i = 0; i != Set.size(); ++i) {
if (i) dbgs() << ',';
- dbgs() << ' ' << Set[i]->getPassName();
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+ dbgs() << ' ' << PInf->getPassName();
}
dbgs() << '\n';
}
@@ -1131,14 +1140,14 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
TPM->dumpPasses();
}
- // Module Level pass may required Function Level analysis info
- // (e.g. dominator info). Pass manager uses on the fly function pass manager
- // to provide this on demand. In that case, in Pass manager terminology,
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
// module level pass is requiring lower level analysis info managed by
// lower level pass manager.
// When Pass manager is not able to order required analysis info, Pass manager
- // checks whether any lower level manager will be able to provide this
+ // checks whether any lower level manager will be able to provide this
// analysis info on demand or not.
#ifndef NDEBUG
dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
@@ -1147,7 +1156,7 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
llvm_unreachable("Unable to schedule pass");
}
-Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) {
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
assert(0 && "Unable to find on the fly pass");
return NULL;
}
@@ -1166,7 +1175,7 @@ Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
return PM.findAnalysisPass(ID, dir);
}
-Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI,
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
Function &F) {
return PM.getOnTheFlyPass(P, AnalysisPI, F);
}
@@ -1174,8 +1183,8 @@ Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI,
//===----------------------------------------------------------------------===//
// BBPassManager implementation
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
/// the function, and if so, return true.
bool BBPassManager::runOnFunction(Function &F) {
if (F.isDeclaration())
@@ -1202,7 +1211,7 @@ bool BBPassManager::runOnFunction(Function &F) {
}
Changed |= LocalChanged;
- if (LocalChanged)
+ if (LocalChanged)
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
I->getName());
dumpPreservedSet(BP);
@@ -1286,17 +1295,18 @@ void FunctionPassManager::addImpl(Pass *P) {
/// PassManager_X is destroyed, the pass will be destroyed as well, so
/// there is no need to delete the pass. (TODO delete passes.)
/// This implies that all passes MUST be allocated with 'new'.
-void FunctionPassManager::add(Pass *P) {
+void FunctionPassManager::add(Pass *P) {
// If this is a not a function pass, don't add a printer for it.
+ const void *PassID = P->getPassID();
if (P->getPassKind() == PT_Function)
- if (ShouldPrintBeforePass(P))
+ if (ShouldPrintBeforePass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ P->getPassName() + " ***"));
addImpl(P);
if (P->getPassKind() == PT_Function)
- if (ShouldPrintAfterPass(P))
+ if (ShouldPrintAfterPass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ P->getPassName() + " ***"));
}
@@ -1405,8 +1415,8 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
}
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnFunction method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
/// the function, and if so, return true.
bool FPPassManager::runOnFunction(Function &F) {
if (F.isDeclaration())
@@ -1476,8 +1486,8 @@ bool FPPassManager::doFinalization(Module &M) {
//===----------------------------------------------------------------------===//
// MPPassManager implementation
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnModule method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
/// the module, and if so, return true.
bool
MPPassManager::runOnModule(Module &M) {
@@ -1512,7 +1522,7 @@ MPPassManager::runOnModule(Module &M) {
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
M.getModuleIdentifier());
dumpPreservedSet(MP);
-
+
verifyPreservedAnalysis(MP);
removeNotPreservedAnalysis(MP);
recordAvailableAnalysis(MP);
@@ -1538,7 +1548,7 @@ MPPassManager::runOnModule(Module &M) {
void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
"Unable to handle Pass that requires lower level Analysis pass");
- assert((P->getPotentialPassManagerType() <
+ assert((P->getPotentialPassManagerType() <
RequiredPass->getPotentialPassManagerType()) &&
"Unable to handle Pass that requires lower level Analysis pass");
@@ -1558,13 +1568,13 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
FPP->setLastUser(LU, P);
}
-/// Return function pass corresponding to PassInfo PI, that is
+/// Return function pass corresponding to PassInfo PI, that is
/// required by module pass MP. Instantiate analysis pass, by using
/// its runOnFunction() for function F.
-Pass* MPPassManager::getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F){
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
assert(FPP && "Unable to find on the fly pass");
-
+
FPP->releaseMemoryOnTheFly();
FPP->run(F);
return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
@@ -1614,13 +1624,14 @@ void PassManager::addImpl(Pass *P) {
/// will be destroyed as well, so there is no need to delete the pass. This
/// implies that all passes MUST be allocated with 'new'.
void PassManager::add(Pass *P) {
- if (ShouldPrintBeforePass(P))
+ const void* PassID = P->getPassID();
+ if (ShouldPrintBeforePass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ P->getPassName() + " ***"));
addImpl(P);
- if (ShouldPrintAfterPass(P))
+ if (ShouldPrintAfterPass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ P->getPassName() + " ***"));
}
@@ -1656,7 +1667,7 @@ void TimingInfo::createTheTimeInfo() {
/// If TimingInfo is enabled then start pass timer.
Timer *llvm::getPassTimer(Pass *P) {
- if (TheTimeInfo)
+ if (TheTimeInfo)
return TheTimeInfo->getPassTimer(P);
return 0;
}
@@ -1690,8 +1701,8 @@ void PMStack::push(PMDataManager *PM) {
}
// Dump content of the pass manager stack.
-void PMStack::dump() {
- for (std::deque<PMDataManager *>::iterator I = S.begin(),
+void PMStack::dump() const {
+ for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
E = S.end(); I != E; ++I)
printf("%s ", (*I)->getAsPass()->getPassName());
@@ -1700,11 +1711,11 @@ void PMStack::dump() {
}
/// Find appropriate Module Pass Manager in the PM Stack and
-/// add self into that manager.
-void ModulePass::assignPassManager(PMStack &PMS,
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
// Find Module Pass Manager
- while(!PMS.empty()) {
+ while (!PMS.empty()) {
PassManagerType TopPMType = PMS.top()->getPassManagerType();
if (TopPMType == PreferredType)
break; // We found desired pass manager
@@ -1718,7 +1729,7 @@ void ModulePass::assignPassManager(PMStack &PMS,
}
/// Find appropriate Function Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
+/// in the PM Stack and add self into that manager.
void FunctionPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
@@ -1727,7 +1738,7 @@ void FunctionPass::assignPassManager(PMStack &PMS,
if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
PMS.pop();
else
- break;
+ break;
}
// Create new Function Pass Manager if needed.
@@ -1759,14 +1770,14 @@ void FunctionPass::assignPassManager(PMStack &PMS,
}
/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
+/// in the PM Stack and add self into that manager.
void BasicBlockPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
BBPassManager *BBP;
// Basic Pass Manager is a leaf pass manager. It does not handle
// any other pass manager.
- if (!PMS.empty() &&
+ if (!PMS.empty() &&
PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
BBP = (BBPassManager *)PMS.top();
} else {
@@ -1796,38 +1807,3 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
}
PassManagerBase::~PassManagerBase() {}
-
-/*===-- C Bindings --------------------------------------------------------===*/
-
-LLVMPassManagerRef LLVMCreatePassManager() {
- return wrap(new PassManager());
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
- return wrap(new FunctionPassManager(unwrap(M)));
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
- return LLVMCreateFunctionPassManagerForModule(
- reinterpret_cast<LLVMModuleRef>(P));
-}
-
-LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
- return unwrap<PassManager>(PM)->run(*unwrap(M));
-}
-
-LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
- return unwrap<FunctionPassManager>(FPM)->doInitialization();
-}
-
-LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
- return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
-}
-
-LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
- return unwrap<FunctionPassManager>(FPM)->doFinalization();
-}
-
-void LLVMDisposePassManager(LLVMPassManagerRef PM) {
- delete unwrap(PM);
-}
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
new file mode 100644
index 000000000000..21dba56aad72
--- /dev/null
+++ b/lib/VMCore/PassRegistry.cpp
@@ -0,0 +1,159 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+static PassRegistry *PassRegistryObj = 0;
+PassRegistry *PassRegistry::getPassRegistry() {
+ // Use double-checked locking to safely initialize the registrar when
+ // we're running in multithreaded mode.
+ PassRegistry* tmp = PassRegistryObj;
+ if (llvm_is_multithreaded()) {
+ sys::MemoryFence();
+ if (!tmp) {
+ llvm_acquire_global_lock();
+ tmp = PassRegistryObj;
+ if (!tmp) {
+ tmp = new PassRegistry();
+ sys::MemoryFence();
+ PassRegistryObj = tmp;
+ }
+ llvm_release_global_lock();
+ }
+ } else if (!tmp) {
+ PassRegistryObj = new PassRegistry();
+ }
+
+ return PassRegistryObj;
+}
+
+namespace {
+
+// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful ressurection after
+// llvm_shutdown is run. Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+void cleanupPassRegistry(void*) {
+ if (PassRegistryObj) {
+ delete PassRegistryObj;
+ PassRegistryObj = 0;
+ }
+}
+ManagedCleanup<&cleanupPassRegistry> registryCleanup ATTRIBUTE_USED;
+
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+ sys::SmartScopedLock<true> Guard(Lock);
+ MapType::const_iterator I = PassInfoMap.find(TI);
+ return I != PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+ sys::SmartScopedLock<true> Guard(Lock);
+ StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
+ return I != PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ bool Inserted =
+ PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
+ PassInfoStringMap[PI.getPassArgument()] = &PI;
+
+ // Notify any listeners.
+ for (std::vector<PassRegistrationListener*>::iterator
+ I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
+ (*I)->passRegistered(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
+ assert(I != PassInfoMap.end() && "Pass registered but not in map!");
+
+ // Remove pass from the map.
+ PassInfoMap.erase(I);
+ PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ for (MapType::const_iterator I = PassInfoMap.begin(),
+ E = PassInfoMap.end(); I != E; ++I)
+ L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
+ const void *PassID,
+ PassInfo& Registeree,
+ bool isDefault) {
+ PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
+ if (InterfaceInfo == 0) {
+ // First reference to Interface, register it now.
+ registerPass(Registeree);
+ InterfaceInfo = &Registeree;
+ }
+ assert(Registeree.isAnalysisGroup() &&
+ "Trying to join an analysis group that is a normal pass!");
+
+ if (PassID) {
+ PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+ assert(ImplementationInfo &&
+ "Must register pass before adding to AnalysisGroup!");
+
+ // Make sure we keep track of the fact that the implementation implements
+ // the interface.
+ ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+ sys::SmartScopedLock<true> Guard(Lock);
+ AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
+ assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+ "Cannot add a pass to the same analysis group more than once!");
+ AGI.Implementations.insert(ImplementationInfo);
+ if (isDefault) {
+ assert(InterfaceInfo->getNormalCtor() == 0 &&
+ "Default implementation for analysis group already specified!");
+ assert(ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
+ InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+ }
+ }
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ std::vector<PassRegistrationListener*>::iterator I =
+ std::find(Listeners.begin(), Listeners.end(), L);
+ assert(I != Listeners.end() && "PassRegistrationListener not registered!");
+ Listeners.erase(I);
+}
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 2d69dce07f3f..2ee49d235963 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -28,10 +28,10 @@ namespace {
bool DeleteStream; // Delete the ostream in our dtor?
public:
static char ID;
- PrintModulePass() : ModulePass(&ID), Out(&dbgs()),
+ PrintModulePass() : ModulePass(ID), Out(&dbgs()),
DeleteStream(false) {}
PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
- : ModulePass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
+ : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
~PrintModulePass() {
if (DeleteStream) delete Out;
@@ -53,12 +53,12 @@ namespace {
bool DeleteStream; // Delete the ostream in our dtor?
public:
static char ID;
- PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()),
+ PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()),
DeleteStream(false) {}
PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
- : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
+ : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
- inline ~PrintFunctionPass() {
+ ~PrintFunctionPass() {
if (DeleteStream) delete Out;
}
@@ -77,11 +77,11 @@ namespace {
}
char PrintModulePass::ID = 0;
-static RegisterPass<PrintModulePass>
-X("print-module", "Print module to stderr");
+INITIALIZE_PASS(PrintModulePass, "print-module",
+ "Print module to stderr", false, false);
char PrintFunctionPass::ID = 0;
-static RegisterPass<PrintFunctionPass>
-Y("print-function","Print function to stderr");
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+ "Print function to stderr", false, false);
/// createPrintModulePass - Create and return a pass that writes the
/// module to the specified raw_ostream.
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 845b523c2421..c55e6267836a 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -50,7 +50,7 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
/// Because of the way Type subclasses are allocated, this function is necessary
/// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space
+/// Some type objects (FunctionTy, StructTy) allocate additional space
/// after the space for their derived type to hold the contained types array of
/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
/// allocated with the type object, decreasing allocations and eliminating the
@@ -66,8 +66,7 @@ void Type::destroy() const {
// Structures and Functions allocate their contained types past the end of
// the type object itself. These need to be destroyed differently than the
// other types.
- if (this->isFunctionTy() || this->isStructTy() ||
- this->isUnionTy()) {
+ if (this->isFunctionTy() || this->isStructTy()) {
// First, make sure we destruct any PATypeHandles allocated by these
// subclasses. They must be manually destructed.
for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -77,10 +76,10 @@ void Type::destroy() const {
// to delete this as an array of char.
if (this->isFunctionTy())
static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
- else if (this->isStructTy())
+ else {
+ assert(isStructTy());
static_cast<const StructType*>(this)->StructType::~StructType();
- else
- static_cast<const UnionType*>(this)->UnionType::~UnionType();
+ }
// Finally, remove the memory as an array deallocation of the chars it was
// constructed from.
@@ -234,7 +233,7 @@ bool Type::isSizedDerivedType() const {
if (const VectorType *PTy = dyn_cast<VectorType>(this))
return PTy->getElementType()->isSized();
- if (!this->isStructTy() && !this->isUnionTy())
+ if (!this->isStructTy())
return false;
// Okay, our struct is sized if all of the elements are...
@@ -319,31 +318,6 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
}
-bool UnionType::indexValid(const Value *V) const {
- // Union indexes require 32-bit integer constants.
- if (V->getType()->isIntegerTy(32))
- if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
- return indexValid(CU->getZExtValue());
- return false;
-}
-
-bool UnionType::indexValid(unsigned V) const {
- return V < NumContainedTys;
-}
-
-// getTypeAtIndex - Given an index value into the type, return the type of the
-// element. For a structure type, this must be a constant value...
-//
-const Type *UnionType::getTypeAtIndex(const Value *V) const {
- unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
- return getTypeAtIndex(Idx);
-}
-
-const Type *UnionType::getTypeAtIndex(unsigned Idx) const {
- assert(indexValid(Idx) && "Invalid structure index!");
- return ContainedTys[Idx];
-}
-
//===----------------------------------------------------------------------===//
// Primitive 'Type' data
//===----------------------------------------------------------------------===//
@@ -455,8 +429,8 @@ const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
/// isValidReturnType - Return true if the specified type is valid as a return
/// type.
bool FunctionType::isValidReturnType(const Type *RetTy) {
- return RetTy->getTypeID() != LabelTyID &&
- RetTy->getTypeID() != MetadataTyID;
+ return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+ !RetTy->isMetadataTy();
}
/// isValidArgumentType - Return true if the specified type is valid as an
@@ -507,23 +481,6 @@ StructType::StructType(LLVMContext &C,
setAbstract(isAbstract);
}
-UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes)
- : CompositeType(C, UnionTyID) {
- ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
- NumContainedTys = NumTypes;
- bool isAbstract = false;
- for (unsigned i = 0; i < NumTypes; ++i) {
- assert(Types[i] && "<null> type for union field!");
- assert(isValidElementType(Types[i]) &&
- "Invalid type for union element!");
- new (&ContainedTys[i]) PATypeHandle(Types[i], this);
- isAbstract |= Types[i]->isAbstract();
- }
-
- // Calculate whether or not this type is abstract
- setAbstract(isAbstract);
-}
-
ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
: SequentialType(ArrayTyID, ElType) {
NumElements = NumEl;
@@ -603,8 +560,8 @@ namespace llvm {
static inline ChildIteratorType child_begin(NodeType *N) {
if (N->isAbstract())
return N->subtype_begin();
- else // No need to process children of concrete types.
- return N->subtype_end();
+ // No need to process children of concrete types.
+ return N->subtype_end();
}
static inline ChildIteratorType child_end(NodeType *N) {
return N->subtype_end();
@@ -627,35 +584,35 @@ void Type::PromoteAbstractToConcrete() {
// Concrete types are leaves in the tree. Since an SCC will either be all
// abstract or all concrete, we only need to check one type.
- if (SCC[0]->isAbstract()) {
- if (SCC[0]->isOpaqueTy())
- return; // Not going to be concrete, sorry.
-
- // If all of the children of all of the types in this SCC are concrete,
- // then this SCC is now concrete as well. If not, neither this SCC, nor
- // any parent SCCs will be concrete, so we might as well just exit.
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
- E = SCC[i]->subtype_end(); CI != E; ++CI)
- if ((*CI)->isAbstract())
- // If the child type is in our SCC, it doesn't make the entire SCC
- // abstract unless there is a non-SCC abstract type.
- if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
- return; // Not going to be concrete, sorry.
-
- // Okay, we just discovered this whole SCC is now concrete, mark it as
- // such!
- for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
- assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
-
- SCC[i]->setAbstract(false);
- }
-
- for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
- assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
- // The type just became concrete, notify all users!
- cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
- }
+ if (!SCC[0]->isAbstract()) continue;
+
+ if (SCC[0]->isOpaqueTy())
+ return; // Not going to be concrete, sorry.
+
+ // If all of the children of all of the types in this SCC are concrete,
+ // then this SCC is now concrete as well. If not, neither this SCC, nor
+ // any parent SCCs will be concrete, so we might as well just exit.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
+ E = SCC[i]->subtype_end(); CI != E; ++CI)
+ if ((*CI)->isAbstract())
+ // If the child type is in our SCC, it doesn't make the entire SCC
+ // abstract unless there is a non-SCC abstract type.
+ if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
+ return; // Not going to be concrete, sorry.
+
+ // Okay, we just discovered this whole SCC is now concrete, mark it as
+ // such!
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
+
+ SCC[i]->setAbstract(false);
+ }
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
+ // The type just became concrete, notify all users!
+ cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
}
}
}
@@ -693,11 +650,15 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
const IntegerType *ITy2 = cast<IntegerType>(Ty2);
return ITy->getBitWidth() == ITy2->getBitWidth();
- } else if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ }
+
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
const PointerType *PTy2 = cast<PointerType>(Ty2);
return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
- } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructType *STy2 = cast<StructType>(Ty2);
if (STy->getNumElements() != STy2->getNumElements()) return false;
if (STy->isPacked() != STy2->isPacked()) return false;
@@ -705,22 +666,21 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
return false;
return true;
- } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- const UnionType *UTy2 = cast<UnionType>(Ty2);
- if (UTy->getNumElements() != UTy2->getNumElements()) return false;
- for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i)
- if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes))
- return false;
- return true;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ }
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
const ArrayType *ATy2 = cast<ArrayType>(Ty2);
return ATy->getNumElements() == ATy2->getNumElements() &&
TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
- } else if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
+ }
+
+ if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
const VectorType *PTy2 = cast<VectorType>(Ty2);
return PTy->getNumElements() == PTy2->getNumElements() &&
TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
- } else if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ }
+
+ if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
if (FTy->isVarArg() != FTy2->isVarArg() ||
FTy->getNumParams() != FTy2->getNumParams() ||
@@ -731,10 +691,10 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
return false;
}
return true;
- } else {
- llvm_unreachable("Unknown derived type!");
- return false;
}
+
+ llvm_unreachable("Unknown derived type!");
+ return false;
}
namespace llvm { // in namespace llvm so findable by ADL
@@ -808,13 +768,13 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
// Check for the built-in integer types
switch (NumBits) {
- case 1: return cast<IntegerType>(Type::getInt1Ty(C));
- case 8: return cast<IntegerType>(Type::getInt8Ty(C));
- case 16: return cast<IntegerType>(Type::getInt16Ty(C));
- case 32: return cast<IntegerType>(Type::getInt32Ty(C));
- case 64: return cast<IntegerType>(Type::getInt64Ty(C));
- default:
- break;
+ case 1: return cast<IntegerType>(Type::getInt1Ty(C));
+ case 8: return cast<IntegerType>(Type::getInt8Ty(C));
+ case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+ case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+ case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+ default:
+ break;
}
LLVMContextImpl *pImpl = C.pImpl;
@@ -902,8 +862,8 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
}
bool ArrayType::isValidElementType(const Type *ElemTy) {
- return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy();
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
@@ -975,60 +935,6 @@ bool StructType::isValidElementType(const Type *ElemTy) {
//===----------------------------------------------------------------------===//
-// Union Type Factory...
-//
-
-UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) {
- assert(NumTypes > 0 && "union must have at least one member type!");
- UnionValType UTV(Types, NumTypes);
- UnionType *UT = 0;
-
- LLVMContextImpl *pImpl = Types[0]->getContext().pImpl;
-
- UT = pImpl->UnionTypes.get(UTV);
-
- if (!UT) {
- // Value not found. Derive a new type!
- UT = (UnionType*) operator new(sizeof(UnionType) +
- sizeof(PATypeHandle) * NumTypes);
- new (UT) UnionType(Types[0]->getContext(), Types, NumTypes);
- pImpl->UnionTypes.add(UTV, UT);
- }
-#ifdef DEBUG_MERGE_TYPES
- DEBUG(dbgs() << "Derived new type: " << *UT << "\n");
-#endif
- return UT;
-}
-
-UnionType *UnionType::get(const Type *type, ...) {
- va_list ap;
- SmallVector<const llvm::Type*, 8> UnionFields;
- va_start(ap, type);
- while (type) {
- UnionFields.push_back(type);
- type = va_arg(ap, llvm::Type*);
- }
- unsigned NumTypes = UnionFields.size();
- assert(NumTypes > 0 && "union must have at least one member type!");
- return llvm::UnionType::get(&UnionFields[0], NumTypes);
-}
-
-bool UnionType::isValidElementType(const Type *ElemTy) {
- return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
-}
-
-int UnionType::getElementTypeIndex(const Type *ElemTy) const {
- int index = 0;
- for (UnionType::element_iterator I = element_begin(), E = element_end();
- I != E; ++I, ++index) {
- if (ElemTy == *I) return index;
- }
-
- return -1;
-}
-
-//===----------------------------------------------------------------------===//
// Pointer Type Factory...
//
@@ -1060,9 +966,8 @@ const PointerType *Type::getPointerTo(unsigned addrs) const {
}
bool PointerType::isValidElementType(const Type *ElemTy) {
- return ElemTy->getTypeID() != VoidTyID &&
- ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID;
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy();
}
@@ -1071,8 +976,7 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
//
OpaqueType *OpaqueType::get(LLVMContext &C) {
- OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct
-
+ OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct.
LLVMContextImpl *pImpl = C.pImpl;
pImpl->OpaqueTypes.insert(OT);
return OT;
@@ -1123,18 +1027,17 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
<< ">[" << (void*)this << "]" << "\n");
#endif
- this->destroy();
+ this->destroy();
}
-
}
-// unlockedRefineAbstractTypeTo - This function is used when it is discovered
+// refineAbstractTypeTo - This function is used when it is discovered
// that the 'this' abstract type is actually equivalent to the NewType
// specified. This causes all users of 'this' to switch to reference the more
// concrete type NewType and for 'this' to be deleted. Only used for internal
// callers.
//
-void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
assert(this != NewType && "Can't refine to myself!");
assert(ForwardType == 0 && "This type has already been refined!");
@@ -1199,15 +1102,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
// destroyed.
}
-// refineAbstractTypeTo - This function is used by external callers to notify
-// us that this abstract type is equivalent to another type.
-//
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
- // All recursive calls will go through unlockedRefineAbstractTypeTo,
- // to avoid deadlock problems.
- unlockedRefineAbstractTypeTo(NewType);
-}
-
// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
// the current type has transitioned from being abstract to being concrete.
//
@@ -1291,21 +1185,6 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
// concrete - this could potentially change us from an abstract type to a
// concrete type.
//
-void UnionType::refineAbstractType(const DerivedType *OldType,
- const Type *NewType) {
- LLVMContextImpl *pImpl = OldType->getContext().pImpl;
- pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType);
-}
-
-void UnionType::typeBecameConcrete(const DerivedType *AbsTy) {
- LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
- pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy);
-}
-
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
void PointerType::refineAbstractType(const DerivedType *OldType,
const Type *NewType) {
LLVMContextImpl *pImpl = OldType->getContext().pImpl;
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 02ab1135b32c..5a90917977b0 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -180,32 +180,6 @@ public:
}
};
-// UnionValType - Define a class to hold the key that goes into the TypeMap
-//
-class UnionValType {
- std::vector<const Type*> ElTypes;
-public:
- UnionValType(const Type* const* Types, unsigned NumTypes)
- : ElTypes(&Types[0], &Types[NumTypes]) {}
-
- static UnionValType get(const UnionType *UT) {
- std::vector<const Type *> ElTypes;
- ElTypes.reserve(UT->getNumElements());
- for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i)
- ElTypes.push_back(UT->getElementType(i));
-
- return UnionValType(&ElTypes[0], ElTypes.size());
- }
-
- static unsigned hashTypeStructure(const UnionType *UT) {
- return UT->getNumElements();
- }
-
- inline bool operator<(const UnionValType &UTV) const {
- return (ElTypes < UTV.ElTypes);
- }
-};
-
// FunctionValType - Define a class to hold the key that goes into the TypeMap
//
class FunctionValType {
@@ -370,7 +344,7 @@ public:
// We already have this type in the table. Get rid of the newly refined
// type.
TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
- Ty->unlockedRefineAbstractTypeTo(NewTy);
+ Ty->refineAbstractTypeTo(NewTy);
return;
}
} else {
@@ -385,31 +359,33 @@ public:
if (I->second == Ty) {
// Remember the position of the old type if we see it in our scan.
Entry = I;
+ continue;
+ }
+
+ if (!TypesEqual(Ty, I->second))
+ continue;
+
+ TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+
+ // Remove the old entry form TypesByHash. If the hash values differ
+ // now, remove it from the old place. Otherwise, continue scanning
+ // withing this hashcode to reduce work.
+ if (NewTypeHash != OldTypeHash) {
+ RemoveFromTypesByHash(OldTypeHash, Ty);
} else {
- if (TypesEqual(Ty, I->second)) {
- TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-
- // Remove the old entry form TypesByHash. If the hash values differ
- // now, remove it from the old place. Otherwise, continue scanning
- // withing this hashcode to reduce work.
- if (NewTypeHash != OldTypeHash) {
- RemoveFromTypesByHash(OldTypeHash, Ty);
- } else {
- if (Entry == E) {
- // Find the location of Ty in the TypesByHash structure if we
- // haven't seen it already.
- while (I->second != Ty) {
- ++I;
- assert(I != E && "Structure doesn't contain type??");
- }
- Entry = I;
- }
- TypesByHash.erase(Entry);
+ if (Entry == E) {
+ // Find the location of Ty in the TypesByHash structure if we
+ // haven't seen it already.
+ while (I->second != Ty) {
+ ++I;
+ assert(I != E && "Structure doesn't contain type??");
}
- Ty->unlockedRefineAbstractTypeTo(NewTy);
- return;
+ Entry = I;
}
+ TypesByHash.erase(Entry);
}
+ Ty->refineAbstractTypeTo(NewTy);
+ return;
}
// If there is no existing type of the same structure, we reinsert an
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index b7fd92f9b066..fec710b39459 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -86,14 +86,27 @@ const Use *Use::getImpliedUser() const {
//===----------------------------------------------------------------------===//
Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
+ while (Done < 20) {
+ if (Start == Stop--)
+ return Start;
+ static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, stopTag,
+ zeroDigitTag, oneDigitTag, oneDigitTag,
+ stopTag, zeroDigitTag, oneDigitTag,
+ zeroDigitTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, oneDigitTag,
+ oneDigitTag, stopTag
+ };
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(tags[Done++]));
+ Stop->Val = 0;
+ }
+
ptrdiff_t Count = Done;
while (Start != Stop) {
--Stop;
Stop->Val = 0;
if (!Count) {
- Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Done == 0
- ? fullStopTag
- : stopTag));
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(stopTag));
++Done;
Count = Done;
} else {
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 585edf09c9e5..b8c677565467 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -139,10 +139,6 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
} else if (Argument *A = dyn_cast<Argument>(V)) {
if (Function *P = A->getParent())
ST = &P->getValueSymbolTable();
- } else if (NamedMDNode *N = dyn_cast<NamedMDNode>(V)) {
- if (Module *P = N->getParent()) {
- ST = &P->getValueSymbolTable();
- }
} else if (isa<MDString>(V))
return true;
else {
@@ -492,10 +488,15 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
ValueHandleBase *Entry = pImpl->ValueHandles[V];
assert(Entry && "Value bit set but no entries exist");
- // We use a local ValueHandleBase as an iterator so that
- // ValueHandles can add and remove themselves from the list without
- // breaking our iteration. This is not really an AssertingVH; we
- // just have to give ValueHandleBase some kind.
+ // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+ // and remove themselves from the list without breaking our iteration. This
+ // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+ // Note that we deliberately do not the support the case when dropping a value
+ // handle results in a new value handle being permanently added to the list
+ // (as might occur in theory for CallbackVH's): the new value handle will not
+ // be processed and the checking code will mete out righteous punishment if
+ // the handle is still present once we have finished processing all the other
+ // value handles (it is fine to momentarily add then remove a value handle).
for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
Iterator.RemoveFromUseList();
Iterator.AddToExistingUseListAfter(Entry);
@@ -576,6 +577,24 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
break;
}
}
+
+#ifndef NDEBUG
+ // If any new tracking or weak value handles were added while processing the
+ // list, then complain about it now.
+ if (Old->HasValueHandle)
+ for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+ switch (Entry->getKind()) {
+ case Tracking:
+ case Weak:
+ dbgs() << "After RAUW from " << *Old->getType() << " %"
+ << Old->getNameStr() << " to " << *New->getType() << " %"
+ << New->getNameStr() << "\n";
+ llvm_unreachable("A tracking or weak value handle still pointed to the"
+ " old value!\n");
+ default:
+ break;
+ }
+#endif
}
/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 449d61a2cbb1..254bf06439d9 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -115,5 +115,3 @@ void ValueSymbolTable::dump() const {
//DEBUG(dbgs() << "\n");
}
}
-
-MDSymbolTable::~MDSymbolTable() { }
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index f97699dabd89..e3ecc979bf12 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -72,7 +72,7 @@ namespace { // Anonymous namespace for class
struct PreVerifier : public FunctionPass {
static char ID; // Pass ID, replacement for typeid
- PreVerifier() : FunctionPass(&ID) { }
+ PreVerifier() : FunctionPass(ID) { }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -102,9 +102,9 @@ namespace { // Anonymous namespace for class
}
char PreVerifier::ID = 0;
-static RegisterPass<PreVerifier>
-PreVer("preverify", "Preliminary module verification");
-static const PassInfo *const PreVerifyID = &PreVer;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
+ false, false);
+char &PreVerifyID = PreVerifier::ID;
namespace {
class TypeSet : public AbstractTypeUser {
@@ -182,23 +182,13 @@ namespace {
SmallPtrSet<MDNode *, 32> MDNodes;
Verifier()
- : FunctionPass(&ID),
+ : FunctionPass(ID),
Broken(false), RealPass(true), action(AbortProcessAction),
Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
explicit Verifier(VerifierFailureAction ctn)
- : FunctionPass(&ID),
+ : FunctionPass(ID),
Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
MessagesStr(Messages) {}
- explicit Verifier(bool AB)
- : FunctionPass(&ID),
- Broken(false), RealPass(true),
- action( AB ? AbortProcessAction : PrintMessageAction), Mod(0),
- Context(0), DT(0), MessagesStr(Messages) {}
- explicit Verifier(DominatorTree &dt)
- : FunctionPass(&ID),
- Broken(false), RealPass(false), action(PrintMessageAction), Mod(0),
- Context(0), DT(&dt), MessagesStr(Messages) {}
-
bool doInitialization(Module &M) {
Mod = &M;
@@ -331,6 +321,7 @@ namespace {
void visitBranchInst(BranchInst &BI);
void visitReturnInst(ReturnInst &RI);
void visitSwitchInst(SwitchInst &SI);
+ void visitIndirectBrInst(IndirectBrInst &BI);
void visitSelectInst(SelectInst &SI);
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
@@ -402,7 +393,7 @@ namespace {
} // End anonymous namespace
char Verifier::ID = 0;
-static RegisterPass<Verifier> X("verify", "Module Verifier");
+INITIALIZE_PASS(Verifier, "verify", "Module Verifier", false, false);
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -445,6 +436,10 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
+
+ Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
+ "linker_private_weak_def_auto can only have default visibility!",
+ &GV);
}
void Verifier::visitGlobalVariable(GlobalVariable &GV) {
@@ -504,8 +499,8 @@ void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
if (!MD)
continue;
- Assert2(!MD->isFunctionLocal(),
- "Named metadata operand cannot be function local!", &NMD, MD);
+ Assert1(!MD->isFunctionLocal(),
+ "Named metadata operand cannot be function local!", MD);
visitMDNode(*MD, 0);
}
}
@@ -520,7 +515,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
Value *Op = MD.getOperand(i);
if (!Op)
continue;
- if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op))
+ if (isa<Constant>(Op) || isa<MDString>(Op))
continue;
if (MDNode *N = dyn_cast<MDNode>(Op)) {
Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
@@ -864,6 +859,16 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
visitTerminatorInst(SI);
}
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+ Assert1(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
+ for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+ Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
+
+ visitTerminatorInst(BI);
+}
+
void Verifier::visitSelectInst(SelectInst &SI) {
Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
SI.getOperand(2)),
@@ -1202,6 +1207,7 @@ void Verifier::visitCallInst(CallInst &CI) {
void Verifier::visitInvokeInst(InvokeInst &II) {
VerifyCallSite(&II);
+ visitTerminatorInst(II);
}
/// visitBinaryOperator - Check that both arguments to the binary operator are
@@ -1266,28 +1272,37 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
visitInstruction(B);
}
-void Verifier::visitICmpInst(ICmpInst& IC) {
+void Verifier::visitICmpInst(ICmpInst &IC) {
// Check that the operands are the same type
- const Type* Op0Ty = IC.getOperand(0)->getType();
- const Type* Op1Ty = IC.getOperand(1)->getType();
+ const Type *Op0Ty = IC.getOperand(0)->getType();
+ const Type *Op1Ty = IC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
+ // Check that the predicate is valid.
+ Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+ IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ "Invalid predicate in ICmp instruction!", &IC);
visitInstruction(IC);
}
-void Verifier::visitFCmpInst(FCmpInst& FC) {
+void Verifier::visitFCmpInst(FCmpInst &FC) {
// Check that the operands are the same type
- const Type* Op0Ty = FC.getOperand(0)->getType();
- const Type* Op1Ty = FC.getOperand(1)->getType();
+ const Type *Op0Ty = FC.getOperand(0)->getType();
+ const Type *Op1Ty = FC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
Assert1(Op0Ty->isFPOrFPVectorTy(),
"Invalid operand types for FCmp instruction", &FC);
+ // Check that the predicate is valid.
+ Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+ FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ "Invalid predicate in FCmp instruction!", &FC);
+
visitInstruction(FC);
}
@@ -1310,27 +1325,6 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
SV.getOperand(2)),
"Invalid shufflevector operands!", &SV);
-
- const VectorType *VTy = dyn_cast<VectorType>(SV.getOperand(0)->getType());
- Assert1(VTy, "Operands are not a vector type", &SV);
-
- // Check to see if Mask is valid.
- if (const ConstantVector *MV = dyn_cast<ConstantVector>(SV.getOperand(2))) {
- for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
- if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
- Assert1(!CI->uge(VTy->getNumElements()*2),
- "Invalid shufflevector shuffle mask!", &SV);
- } else {
- Assert1(isa<UndefValue>(MV->getOperand(i)),
- "Invalid shufflevector shuffle mask!", &SV);
- }
- }
- } else {
- Assert1(isa<UndefValue>(SV.getOperand(2)) ||
- isa<ConstantAggregateZero>(SV.getOperand(2)),
- "Invalid shufflevector shuffle mask!", &SV);
- }
-
visitInstruction(SV);
}
@@ -1408,10 +1402,6 @@ void Verifier::visitInstruction(Instruction &I) {
"Only PHI nodes may reference their own value!", &I);
}
- // Verify that if this is a terminator that it is at the end of the block.
- if (isa<TerminatorInst>(I))
- Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I);
-
// Check that void typed values don't have names
Assert1(!I.getType()->isVoidTy() || !I.hasName(),
"Instruction has a name, but provides a void value!", &I);
@@ -1570,7 +1560,8 @@ void Verifier::VerifyType(const Type *Ty) {
"Function type with invalid parameter type", ElTy, FTy);
VerifyType(ElTy);
}
- } break;
+ break;
+ }
case Type::StructTyID: {
const StructType *STy = cast<StructType>(Ty);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -1579,34 +1570,29 @@ void Verifier::VerifyType(const Type *Ty) {
"Structure type with invalid element type", ElTy, STy);
VerifyType(ElTy);
}
- } break;
- case Type::UnionTyID: {
- const UnionType *UTy = cast<UnionType>(Ty);
- for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) {
- const Type *ElTy = UTy->getElementType(i);
- Assert2(UnionType::isValidElementType(ElTy),
- "Union type with invalid element type", ElTy, UTy);
- VerifyType(ElTy);
- }
- } break;
+ break;
+ }
case Type::ArrayTyID: {
const ArrayType *ATy = cast<ArrayType>(Ty);
Assert1(ArrayType::isValidElementType(ATy->getElementType()),
"Array type with invalid element type", ATy);
VerifyType(ATy->getElementType());
- } break;
+ break;
+ }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
Assert1(PointerType::isValidElementType(PTy->getElementType()),
"Pointer type with invalid element type", PTy);
VerifyType(PTy->getElementType());
- } break;
+ break;
+ }
case Type::VectorTyID: {
const VectorType *VTy = cast<VectorType>(Ty);
Assert1(VectorType::isValidElementType(VTy->getElementType()),
"Vector type with invalid element type", VTy);
VerifyType(VTy->getElementType());
- } break;
+ break;
+ }
default:
break;
}
@@ -1832,8 +1818,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
// and iPTR. In the verifier, we can not distinguish which case we have so
// allow either case to be legal.
if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
- Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
- EVT::getEVT(PTyp->getElementType()).getEVTString();
+ EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
+ if (PointeeVT == MVT::Other) {
+ CheckFailed("Intrinsic has pointer to complex type.");
+ return false;
+ }
+ Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+ PointeeVT.getEVTString();
} else {
CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
"pointer and a pointer is required.", F);
diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile
index 15e677951299..4125af60d21a 100644
--- a/runtime/libprofile/Makefile
+++ b/runtime/libprofile/Makefile
@@ -1,10 +1,10 @@
##===- runtime/libprofile/Makefile -------------------------*- Makefile -*-===##
-#
+#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
-#
+#
##===----------------------------------------------------------------------===##
LEVEL = ../..
@@ -16,7 +16,7 @@ endif
SHARED_LIBRARY = 1
LOADABLE_MODULE = 1
LIBRARYNAME = profile_rt
-EXTRA_DIST = exported_symbols.lst
-EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/exported_symbols.lst
+EXTRA_DIST = libprofile.exports
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports
include $(LEVEL)/Makefile.common
diff --git a/runtime/libprofile/exported_symbols.lst b/runtime/libprofile/libprofile.exports
index f45ff4760189..f45ff4760189 100644
--- a/runtime/libprofile/exported_symbols.lst
+++ b/runtime/libprofile/libprofile.exports
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
index 5d3f67ebe1ae..7555a4c2a9b0 100644
--- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll
+++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -168,132 +168,132 @@ define void @caller_a(double* %arg_a0,
; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a0
; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a1
; CHECK: NoAlias: double* %noalias_ret_a0, double* %noalias_ret_a1
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; <double*> [#uses=1]
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1)
; CHECK: ===== Alias Analysis Evaluator Report =====
@@ -302,9 +302,9 @@ define void @caller_a(double* %arg_a0,
; CHECK: 36 may alias responses (30.0%)
; CHECK: 0 must alias responses (0.0%)
; CHECK: Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0%
-; CHECK: 128 Total ModRef Queries Performed
-; CHECK: 44 no mod/ref responses (34.3%)
+; CHECK: 184 Total ModRef Queries Performed
+; CHECK: 44 no mod/ref responses (23.9%)
; CHECK: 0 mod responses (0.0%)
; CHECK: 0 ref responses (0.0%)
-; CHECK: 84 mod & ref responses (65.6%)
-; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 34%/0%/0%/65%
+; CHECK: 140 mod & ref responses (76.0%)
+; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76%
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 95f94d096f35..0e0c45c8ad5c 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -1,7 +1,8 @@
-; RUN: opt < %s -aa-eval -print-all-alias-modref-info \
-; RUN: |& grep {MayAlias: double\\* \[%\]p.0.i.0, double\\* \[%\]p3\$}
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info |& FileCheck %s
; PR4267
+; CHECK: MayAlias: double* %p.0.i.0, double* %p3
+
; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
; so MayAlias is needed.
diff --git a/test/Analysis/BasicAA/featuretest.ll b/test/Analysis/BasicAA/featuretest.ll
index 50dc8864ac9b..47d278fab1c2 100644
--- a/test/Analysis/BasicAA/featuretest.ll
+++ b/test/Analysis/BasicAA/featuretest.ll
@@ -1,17 +1,22 @@
; This testcase tests for various features the basicaa test should be able to
; determine, as noted in the comments.
-; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | not grep REMOVE
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@Global = external global { i32 }
+declare void @external(i32*)
+
; Array test: Test that operations on one local array do not invalidate
; operations on another array. Important for scientific codes.
;
define i32 @different_array_test(i64 %A, i64 %B) {
%Array1 = alloca i32, i32 100
%Array2 = alloca i32, i32 200
+
+ call void @external(i32* %Array1)
+ call void @external(i32* %Array2)
%pointer = getelementptr i32* %Array1, i64 %A
%val = load i32* %pointer
@@ -22,6 +27,8 @@ define i32 @different_array_test(i64 %A, i64 %B) {
%REMOVE = load i32* %pointer ; redundant with above load
%retval = sub i32 %REMOVE, %val
ret i32 %retval
+; CHECK: @different_array_test
+; CHECK: ret i32 0
}
; Constant index test: Constant indexes into the same array should not
@@ -29,6 +36,8 @@ define i32 @different_array_test(i64 %A, i64 %B) {
;
define i32 @constant_array_index_test() {
%Array = alloca i32, i32 100
+ call void @external(i32* %Array)
+
%P1 = getelementptr i32* %Array, i64 7
%P2 = getelementptr i32* %Array, i64 6
@@ -37,6 +46,8 @@ define i32 @constant_array_index_test() {
%BREMOVE = load i32* %P1
%Val = sub i32 %A, %BREMOVE
ret i32 %Val
+; CHECK: @constant_array_index_test
+; CHECK: ret i32 0
}
; Test that if two pointers are spaced out by a constant getelementptr, that
@@ -48,6 +59,8 @@ define i32 @gep_distance_test(i32* %A) {
%REMOVEv = load i32* %A
%r = sub i32 %REMOVEu, %REMOVEv
ret i32 %r
+; CHECK: @gep_distance_test
+; CHECK: ret i32 0
}
; Test that if two pointers are spaced out by a constant offset, that they
@@ -60,6 +73,8 @@ define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) {
%REMOVEv = load i32* %A1
%r = sub i32 %REMOVEu, %REMOVEv
ret i32 %r
+; CHECK: @gep_distance_test2
+; CHECK: ret i32 0
}
; Test that we can do funny pointer things and that distance calc will still
@@ -68,16 +83,45 @@ define i32 @gep_distance_test3(i32 * %A) {
%X = load i32* %A
%B = bitcast i32* %A to i8*
%C = getelementptr i8* %B, i64 4
- %Y = load i8* %C
- ret i32 8
+ store i8 42, i8* %C
+ %Y = load i32* %A
+ %R = sub i32 %X, %Y
+ ret i32 %R
+; CHECK: @gep_distance_test3
+; CHECK: ret i32 0
}
; Test that we can disambiguate globals reached through constantexpr geps
define i32 @constexpr_test() {
%X = alloca i32
+ call void @external(i32* %X)
+
%Y = load i32* %X
store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0)
%REMOVE = load i32* %X
%retval = sub i32 %Y, %REMOVE
ret i32 %retval
+; CHECK: @constexpr_test
+; CHECK: ret i32 0
+}
+
+
+
+; PR7589
+; These two index expressions are different, this cannot be CSE'd.
+define i16 @zext_sext_confusion(i16* %row2col, i5 %j) nounwind{
+entry:
+ %sum5.cast = zext i5 %j to i64 ; <i64> [#uses=1]
+ %P1 = getelementptr i16* %row2col, i64 %sum5.cast
+ %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1]
+
+ %sum13.cast31 = sext i5 %j to i6 ; <i6> [#uses=1]
+ %sum13.cast = zext i6 %sum13.cast31 to i64 ; <i64> [#uses=1]
+ %P2 = getelementptr i16* %row2col, i64 %sum13.cast
+ %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1]
+
+ %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1]
+ ret i16 %.ret
+; CHECK: @zext_sext_confusion
+; CHECK: ret i16 %.ret
}
diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll
index 1ed031224713..eba9599ba07b 100644
--- a/test/Analysis/BasicAA/gep-alias.ll
+++ b/test/Analysis/BasicAA/gep-alias.ll
@@ -117,12 +117,12 @@ define i32 @test7(i32* %p, i64 %i) {
; P[zext(i)] != p[zext(i+1)]
; PR1143
-define i32 @test8(i32* %p, i32 %i) {
- %i1 = zext i32 %i to i64
- %pi = getelementptr i32* %p, i64 %i1
- %i.next = add i32 %i, 1
- %i.next2 = zext i32 %i.next to i64
- %pi.next = getelementptr i32* %p, i64 %i.next2
+define i32 @test8(i32* %p, i16 %i) {
+ %i1 = zext i16 %i to i32
+ %pi = getelementptr i32* %p, i32 %i1
+ %i.next = add i16 %i, 1
+ %i.next2 = zext i16 %i.next to i32
+ %pi.next = getelementptr i32* %p, i32 %i.next2
%x = load i32* %pi
store i32 42, i32* %pi.next
%y = load i32* %pi
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
new file mode 100644
index 000000000000..12b088b1f651
--- /dev/null
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s
+
+
+; CHECK: Just Ref: call void @ro() <-> call void @f0()
+
+declare void @f0()
+declare void @ro() readonly
+
+define void @test0() {
+ call void @f0()
+ call void @ro()
+ ret void
+}
+
+; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
+; CHECK: NoModRef: call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <-> call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+@A = external global i8
+@B = external global i8
+define void @test1() {
+ call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1)
+ call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1)
+ ret void
+}
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index a2aabf135f6f..b9a3c5e58f68 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -123,3 +123,14 @@ define i32 @test5(i8* %P, i32 %Len) {
; CHECK: sub i32 %tmp, %tmp
}
+define i8 @test6(i8* %p, i8* noalias %a) {
+ %x = load i8* %a
+ %t = va_arg i8* %p, float
+ %y = load i8* %a
+ %z = add i8 %x, %y
+ ret i8 %z
+; CHECK: @test6
+; CHECK: load i8* %a
+; CHECK-NOT: load
+; CHECK: ret
+}
diff --git a/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
new file mode 100644
index 000000000000..218b4375f70c
--- /dev/null
+++ b/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
@@ -0,0 +1,20 @@
+; RUN: opt -regions %s
+define i32 @main() nounwind {
+entry:
+ br label %for.cond
+
+test:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ br i1 true, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret i32 0
+}
diff --git a/test/Analysis/RegionInfo/block_sort.ll b/test/Analysis/RegionInfo/block_sort.ll
new file mode 100644
index 000000000000..faec45a911f5
--- /dev/null
+++ b/test/Analysis/RegionInfo/block_sort.ll
@@ -0,0 +1,42 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @BZ2_blockSort() nounwind {
+start:
+ br label %while
+
+while:
+ br label %while.body134.i.i
+
+while.body134.i.i:
+ br i1 1, label %end, label %w
+
+w:
+ br label %if.end140.i.i
+
+if.end140.i.i:
+ br i1 1, label %while.end186.i.i, label %if.end183.i.i
+
+if.end183.i.i:
+ br label %while.body134.i.i
+
+while.end186.i.i:
+ br label %while
+
+end:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] start => <Function Return>
+; CHECK: [1] while => end
+
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: start, while, while.body134.i.i, end, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
+; BBIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
+
+; RNIT: start, while => end, end,
+; RNIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
diff --git a/test/Analysis/RegionInfo/cond_loop.ll b/test/Analysis/RegionInfo/cond_loop.ll
new file mode 100644
index 000000000000..2ce57c3c5f37
--- /dev/null
+++ b/test/Analysis/RegionInfo/cond_loop.ll
@@ -0,0 +1,33 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+5:
+ br label %"0"
+
+0:
+ br label %"1"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ ret void
+3:
+ br i1 1, label %"1", label %"4"
+4:
+ br label %"0"
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 5 => <Function Return>
+; CHECK: [1] 0 => 2
+
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 5, 0, 1, 2, 3, 4,
+; BBIT: 0, 1, 3, 4,
+
+; RNIT: 5, 0 => 2, 2,
+; RNIT: 0, 1, 3, 4,
diff --git a/test/Analysis/RegionInfo/condition_complicated.ll b/test/Analysis/RegionInfo/condition_complicated.ll
new file mode 100644
index 000000000000..7ca5c7c7b537
--- /dev/null
+++ b/test/Analysis/RegionInfo/condition_complicated.ll
@@ -0,0 +1,60 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+end165:
+ br i1 1, label %false239, label %true181
+
+true181:
+ br i1 1, label %then187, label %else232
+
+then187:
+ br label %end265
+
+else232:
+ br i1 1, label %false239, label %then245
+
+false239:
+ br i1 1, label %then245, label %else259
+
+then245:
+ br i1 1, label %then251, label %end253
+
+then251:
+ br label %end253
+
+end253:
+ br label %end265
+
+else259:
+ br label %end265
+
+end265:
+ br i1 1, label %then291, label %end298
+
+then291:
+ br label %end298
+
+end298:
+ ret i8 1
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] end165 => <Function Return>
+; CHECK-NEXT: [1] end165 => end265
+; CHECK-NEXT: [2] then245 => end253
+; CHECK-NEXT: [1] end265 => end298
+
+; STAT: 4 region - The # of regions
+
+; BBIT: end165, false239, then245, then251, end253, end265, then291, end298, else259, true181, then187, else232,
+; BBIT: end165, false239, then245, then251, end253, else259, true181, then187, else232,
+; BBIT: then245, then251,
+; BBIT: end265, then291,
+
+; RNIT: end165 => end265, end265 => end298, end298,
+; RNIT: end165, false239, then245 => end253, end253, else259, true181, then187, else232,
+; RNIT: then245, then251,
+; RNIT: end265, then291,
diff --git a/test/Analysis/RegionInfo/condition_complicated_2.ll b/test/Analysis/RegionInfo/condition_complicated_2.ll
new file mode 100644
index 000000000000..5fa940a61ef6
--- /dev/null
+++ b/test/Analysis/RegionInfo/condition_complicated_2.ll
@@ -0,0 +1,44 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc void @compress() nounwind {
+end33:
+ br i1 1, label %end124, label %lor.lhs.false95
+
+lor.lhs.false95:
+ br i1 1, label %then107, label %end172
+
+then107:
+ br i1 1, label %end124, label %then113
+
+then113:
+ br label %end124
+
+end124:
+ br label %exit
+
+end172:
+ br label %exit
+
+
+exit:
+ unreachable
+
+
+}
+; CHECK-NOT: =>
+; CHECK: [0] end33 => <Function Return>
+; CHECK-NEXT: [1] end33 => exit
+; CHECK-NEXT: [2] then107 => end124
+
+; STAT: 3 region - The # of regions
+
+; BBIT: end33, end124, exit, lor.lhs.false95, then107, then113, end172,
+; BBIT: end33, end124, lor.lhs.false95, then107, then113, end172,
+; BBIT: then107, then113,
+
+; RNIT: end33 => exit, exit,
+; RNIT: end33, end124, lor.lhs.false95, then107 => end124, end172,
+; RNIT: then107, then113,
diff --git a/test/Analysis/RegionInfo/condition_forward_edge.ll b/test/Analysis/RegionInfo/condition_forward_edge.ll
new file mode 100644
index 000000000000..098c9b6b4613
--- /dev/null
+++ b/test/Analysis/RegionInfo/condition_forward_edge.ll
@@ -0,0 +1,26 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"1"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"3"
+3:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 3
+
+; STAT: 2 region - The # of regions
+
+; BBIT: 0, 1, 2, 3,
+; BBIT: 1, 2,
+
+; RNIT: 0, 1 => 3, 3,
+; RNIT: 1, 2,
diff --git a/test/Analysis/RegionInfo/condition_same_exit.ll b/test/Analysis/RegionInfo/condition_same_exit.ll
new file mode 100644
index 000000000000..1b88596c0f8c
--- /dev/null
+++ b/test/Analysis/RegionInfo/condition_same_exit.ll
@@ -0,0 +1,31 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br i1 1, label %"1", label %"4"
+
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"4"
+3:
+ br label %"4"
+4:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 0 => 4
+; CHECK-NEXT: [2] 1 => 4
+; STAT: 3 region - The # of regions
+
+; BBIT: 0, 1, 2, 4, 3,
+; BBIT: 0, 1, 2, 3,
+; BBIT: 1, 2, 3,
+
+; RNIT: 0 => 4, 4,
+; RNIT: 0, 1 => 4,
+; RNIT: 1, 2, 3,
diff --git a/test/Analysis/RegionInfo/condition_simple.ll b/test/Analysis/RegionInfo/condition_simple.ll
new file mode 100644
index 000000000000..19b154b6476b
--- /dev/null
+++ b/test/Analysis/RegionInfo/condition_simple.ll
@@ -0,0 +1,28 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"1"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"4"
+3:
+ br label %"4"
+4:
+ ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 4
+; STAT: 2 region - The # of regions
+
+; BBIT: 0, 1, 2, 4, 3,
+; BBIT: 1, 2, 3,
+
+; RNIT: 0, 1 => 4, 4,
+; RNIT: 1, 2, 3,
diff --git a/test/Transforms/ABCD/dg.exp b/test/Analysis/RegionInfo/dg.exp
index f2005891a59a..f2005891a59a 100644
--- a/test/Transforms/ABCD/dg.exp
+++ b/test/Analysis/RegionInfo/dg.exp
diff --git a/test/Analysis/RegionInfo/exit_in_condition.ll b/test/Analysis/RegionInfo/exit_in_condition.ll
new file mode 100644
index 000000000000..3b152d2f565d
--- /dev/null
+++ b/test/Analysis/RegionInfo/exit_in_condition.ll
@@ -0,0 +1,38 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+ br label %outer
+
+outer:
+ br label %body
+
+body:
+ br i1 1, label %body.i, label %if.end
+
+body.i:
+ br i1 1, label %end, label %if.end
+
+if.end:
+ br label %if.then64
+
+if.then64:
+ br label %outer
+
+end:
+ ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, outer, body, body.i, end, if.end, if.then64,
+; BBIT: outer, body, body.i, if.end, if.then64,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer, body, body.i, if.end, if.then64,
diff --git a/test/Analysis/RegionInfo/infinite_loop.ll b/test/Analysis/RegionInfo/infinite_loop.ll
new file mode 100644
index 000000000000..59cead492619
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop.ll
@@ -0,0 +1,20 @@
+; RUN: opt -regions -analyze < %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"1"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"2"
+3:
+ br label %"4"
+4:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 4
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
diff --git a/test/Analysis/RegionInfo/infinite_loop_2.ll b/test/Analysis/RegionInfo/infinite_loop_2.ll
new file mode 100644
index 000000000000..80c69b7ab2e2
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_2.ll
@@ -0,0 +1,36 @@
+; RUN: opt -regions -analyze < %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"1"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"5"
+5:
+ br i1 1, label %"11", label %"12"
+11:
+ br label %"6"
+12:
+ br label %"6"
+6:
+ br label %"2"
+3:
+ br label %"4"
+4:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 3
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 5, 11, 6, 12, 3, 4,
+; BBIT: 1, 2, 5, 11, 6, 12,
+
+; RNIT: 0, 1 => 3, 3, 4,
+; RNIT: 1, 2, 5, 11, 6, 12,
diff --git a/test/Analysis/RegionInfo/infinite_loop_3.ll b/test/Analysis/RegionInfo/infinite_loop_3.ll
new file mode 100644
index 000000000000..74ceafb84955
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_3.ll
@@ -0,0 +1,52 @@
+; RUN: opt -regions -analyze < %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"7"
+7:
+ br i1 1, label %"1", label %"8"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"5"
+5:
+ br i1 1, label %"11", label %"12"
+11:
+ br label %"6"
+12:
+ br label %"6"
+6:
+ br label %"2"
+8:
+ br label %"9"
+9:
+ br i1 1, label %"13", label %"14"
+13:
+ br label %"10"
+14:
+ br label %"10"
+10:
+ br label %"8"
+3:
+ br label %"4"
+4:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 3
+; CHECK-NEXT: [1] 7 => 1
+; STAT: 3 region - The # of regions
+; STAT: 2 region - The # of simple regions
+
+; BBIT: 0, 7, 1, 2, 5, 11, 6, 12, 3, 4, 8, 9, 13, 10, 14,
+; BBIT: 7, 8, 9, 13, 10, 14,
+; BBIT: 1, 2, 5, 11, 6, 12,
+
+; RNIT: 0, 7 => 1, 1 => 3, 3, 4,
+; RNIT: 7, 8, 9, 13, 10, 14,
+; RNIT: 1, 2, 5, 11, 6, 12,
diff --git a/test/Analysis/RegionInfo/infinite_loop_4.ll b/test/Analysis/RegionInfo/infinite_loop_4.ll
new file mode 100644
index 000000000000..fd56af1d3b8c
--- /dev/null
+++ b/test/Analysis/RegionInfo/infinite_loop_4.ll
@@ -0,0 +1,48 @@
+; RUN: opt -regions -analyze < %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"7"
+7:
+ br i1 1, label %"1", label %"8"
+1:
+ br i1 1, label %"2", label %"3"
+2:
+ br label %"5"
+5:
+ br i1 1, label %"11", label %"12"
+11:
+ br label %"6"
+12:
+ br label %"6"
+6:
+ br i1 1, label %"2", label %"10"
+8:
+ br label %"9"
+9:
+ br i1 1, label %"13", label %"14"
+13:
+ br label %"10"
+14:
+ br label %"10"
+10:
+ br label %"8"
+3:
+ br label %"4"
+4:
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 7 => 3
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, 3, 4,
+; BBIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12,
+
+; RNIT: 0, 7 => 3, 3, 4,
+; RNIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12,
diff --git a/test/Analysis/RegionInfo/loop_with_condition.ll b/test/Analysis/RegionInfo/loop_with_condition.ll
new file mode 100644
index 000000000000..d1d68982eec6
--- /dev/null
+++ b/test/Analysis/RegionInfo/loop_with_condition.ll
@@ -0,0 +1,46 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+ br label %"1"
+1:
+ br i1 1, label %"6", label %"2"
+2:
+ br i1 1, label %"3", label %"4"
+3:
+ br label %"5"
+4:
+ br label %"5"
+5:
+ br label %"8"
+8:
+ br i1 1, label %"7", label %"9"
+9:
+ br label %"2"
+7:
+ br label %"6"
+6:
+ ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 6
+; CHECK-NEXT: [2] 2 => 7
+; CHECK-NEXT: [3] 2 => 5
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 6, 2, 3, 5, 8, 7, 9, 4,
+; BBIT: 1, 2, 3, 5, 8, 7, 9, 4,
+; BBIT: 2, 3, 5, 8, 9, 4,
+; BBIT: 2, 3, 4,
+
+; RNIT: 0, 1 => 6, 6,
+; RNIT: 1, 2 => 7, 7,
+; RNIT: 2 => 5, 5, 8, 9,
+; RNIT: 2, 3, 4,
diff --git a/test/Analysis/RegionInfo/loops_1.ll b/test/Analysis/RegionInfo/loops_1.ll
new file mode 100644
index 000000000000..d4bf3cc50118
--- /dev/null
+++ b/test/Analysis/RegionInfo/loops_1.ll
@@ -0,0 +1,40 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @loops_1() nounwind {
+entry:
+ br i1 1, label %outer , label %a
+
+a:
+ br label %body
+
+outer:
+ br label %body
+
+body:
+ br i1 1, label %land, label %if
+
+land:
+ br i1 1, label %exit, label %end
+
+exit:
+ br i1 1, label %if, label %end
+
+if:
+ br label %outer
+
+end:
+ ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] entry => end
+; STAT: 2 region - The # of regions
+
+; BBIT: entry, outer, body, land, exit, if, end, a,
+; BBIT: entry, outer, body, land, exit, if, a,
+
+; RNIT: entry => end, end,
+; RNIT: entry, outer, body, land, exit, if, a,
diff --git a/test/Analysis/RegionInfo/loops_2.ll b/test/Analysis/RegionInfo/loops_2.ll
new file mode 100644
index 000000000000..07aa7c311010
--- /dev/null
+++ b/test/Analysis/RegionInfo/loops_2.ll
@@ -0,0 +1,49 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @meread_() nounwind {
+entry:
+ br label %bb23
+
+bb23:
+ br label %bb.i
+
+bb.i: ; preds = %bb.i, %bb54
+ br label %pflini_.exit
+
+pflini_.exit: ; preds = %bb.i
+ br label %bb58thread-split
+
+bb58thread-split: ; preds = %bb64, %bb61, %pflini_.exit
+ br label %bb58
+
+bb58: ; preds = %bb60, %bb58thread-split
+ br i1 1, label %bb59, label %bb23
+
+bb59: ; preds = %bb58
+ switch i32 1, label %bb60 [
+ i32 1, label %l98
+ ]
+
+bb60: ; preds = %bb59
+ br i1 1, label %bb61, label %bb58
+
+bb61: ; preds = %bb60
+ br label %bb58thread-split
+
+l98: ; preds = %bb69, %bb59
+ ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK: [1] bb23 => l98
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, l98,
+; BBIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61,
+
+; RNIT: entry, bb23 => l98, l98,
+; RNIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61,
diff --git a/test/Analysis/RegionInfo/mix_1.ll b/test/Analysis/RegionInfo/mix_1.ll
new file mode 100644
index 000000000000..829c157c2c68
--- /dev/null
+++ b/test/Analysis/RegionInfo/mix_1.ll
@@ -0,0 +1,69 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @a_linear_impl_fig_1() nounwind {
+0:
+
+ br i1 1, label %"1", label %"15"
+1:
+ switch i32 0, label %"2" [ i32 0, label %"3"
+ i32 1, label %"7"]
+2:
+ br label %"4"
+3:
+ br label %"5"
+4:
+ br label %"6"
+5:
+ br label %"6"
+6:
+ br label %"7"
+7:
+ br label %"15"
+15:
+ br label %"8"
+8:
+ br label %"16"
+16:
+ br label %"9"
+9:
+ br i1 1, label %"10", label %"11"
+11:
+ br i1 1, label %"13", label %"12"
+13:
+ br label %"14"
+12:
+ br label %"14"
+14:
+ br label %"8"
+10:
+ br label %"17"
+17:
+ br label %"18"
+18:
+ ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 0 => 15
+; CHECK-NEXT: [2] 1 => 7
+; CHECK-NEXT: [1] 8 => 10
+; CHECK-NEXT: [2] 11 => 14
+; STAT: 5 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 4, 6, 7, 15, 8, 16, 9, 10, 17, 18, 11, 13, 14, 12, 3, 5,
+; BBIT: 0, 1, 2, 4, 6, 7, 3, 5,
+; BBIT: 1, 2, 4, 6, 3, 5,
+; BBIT: 8, 16, 9, 11, 13, 14, 12,
+; BBIT: 11, 13, 12,
+
+; RNIT: 0 => 15, 15, 8 => 10, 10, 17, 18,
+; RNIT: 0, 1 => 7, 7,
+; RNIT: 1, 2, 4, 6, 3, 5,
+; RNIT: 8, 16, 9, 11 => 14, 14,
+; RNIT: 11, 13, 12,
diff --git a/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/test/Analysis/RegionInfo/multiple_exiting_edge.ll
new file mode 100644
index 000000000000..7bc0e4607d68
--- /dev/null
+++ b/test/Analysis/RegionInfo/multiple_exiting_edge.ll
@@ -0,0 +1,38 @@
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition_0() nounwind {
+bb38: ; preds = %bb34, %bb34, %bb37
+ switch i32 undef, label %bb42 [
+ i32 67, label %bb42
+ i32 90, label %bb41
+ ]
+bb41: ; preds = %bb38
+ br label %bb42
+bb42: ; preds = %bb38, %bb38, %bb41
+ ret void
+}
+
+; BBIT: bb38, bb42, bb41,
+; BBIT: bb38, bb41,
+
+; RNIT: bb38 => bb42, bb42,
+; RNIT: bb38, bb41,
+
+define void @normal_condition_1() nounwind {
+bb38: ; preds = %bb34, %bb34, %bb37
+ switch i32 undef, label %bb41 [
+ i32 67, label %bb42
+ i32 90, label %bb42
+ ]
+bb41: ; preds = %bb38
+ br label %bb42
+bb42: ; preds = %bb38, %bb38, %bb41
+ ret void
+}
+
+; BBIT: bb38, bb41, bb42,
+; BBIT: bb38, bb41,
+
+; RNIT: bb38 => bb42, bb42,
+; RNIT: bb38, bb41,
diff --git a/test/Analysis/RegionInfo/nested_loops.ll b/test/Analysis/RegionInfo/nested_loops.ll
new file mode 100644
index 000000000000..9d8c4558f049
--- /dev/null
+++ b/test/Analysis/RegionInfo/nested_loops.ll
@@ -0,0 +1,33 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+ br label %outer
+
+outer:
+ br label %body
+
+body:
+ br i1 1, label %exit172, label %end
+
+exit172:
+ br i1 1, label %end, label %outer
+
+end:
+ ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+
+; STAT: 2 region - The # of regions
+
+; BBIT: entry, outer, body, exit172, end,
+; BBIT: outer, body, exit172,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer, body, exit172,
diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll
new file mode 100644
index 000000000000..d986387099c3
--- /dev/null
+++ b/test/Analysis/RegionInfo/next.ll
@@ -0,0 +1,49 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @MAIN__() nounwind {
+entry:
+ br label %__label_002001.outer
+
+__label_002001.outer: ; preds = %bb236, %bb92
+ br label %__label_002001
+
+__label_002001: ; preds = %bb229, %__label_002001.outer
+ br i1 1, label %bb93, label %__label_000020
+
+bb93: ; preds = %__label_002001
+ br i1 1, label %__label_000020, label %bb197
+
+bb197: ; preds = %bb193
+ br i1 1, label %bb229, label %bb224
+
+bb224: ; preds = %bb223, %bb227
+ br i1 1, label %bb229, label %bb224
+
+bb229: ; preds = %bb227, %bb223
+ br i1 1, label %__label_002001, label %__label_002001.outer
+
+__label_000020: ; preds = %__label_002001, %bb194
+ ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] __label_002001.outer => __label_000020
+; CHECK-NEXT; [2] bb197 => bb229
+; CHECK-NEXT; [3] bb224 => bb229
+
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, __label_002001.outer, __label_002001, bb93, __label_000020, bb197, bb229, bb224,
+; BBIT: __label_002001.outer, __label_002001, bb93, bb197, bb229, bb224,
+; BBIT: bb197, bb224,
+; BBIT: bb224,
+
+; RNIT: entry, __label_002001.outer => __label_000020, __label_000020,
+; RNIT: __label_002001.outer, __label_002001, bb93, bb197 => bb229, bb229,
+; RNIT: bb197, bb224 => bb229,
+; RNIT: bb224,
diff --git a/test/Analysis/RegionInfo/paper.ll b/test/Analysis/RegionInfo/paper.ll
new file mode 100644
index 000000000000..00b544bc6919
--- /dev/null
+++ b/test/Analysis/RegionInfo/paper.ll
@@ -0,0 +1,55 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @a_linear_impl_fig_1() nounwind {
+0:
+ br label %"1"
+1:
+ br label %"2"
+2:
+ br label %"3"
+3:
+ br i1 1, label %"13", label %"4"
+4:
+ br i1 1, label %"5", label %"1"
+5:
+ br i1 1, label %"8", label %"6"
+6:
+ br i1 1, label %"7", label %"4"
+7:
+ ret void
+8:
+ br i1 1, label %"9", label %"1"
+9:
+ br label %"10"
+10:
+ br i1 1, label %"12", label %"11"
+11:
+ br i1 1, label %"9", label %"8"
+13:
+ br i1 1, label %"2", label %"1"
+12:
+ switch i32 0, label %"1" [ i32 0, label %"9"
+ i32 1, label %"8"]
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 7
+; CHECK-NEXT: [2] 1 => 4
+; CHECK-NEXT: [2] 8 => 1
+
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, 7,
+; BBIT: 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6,
+; BBIT: 1, 2, 3, 13,
+; BBIT: 8, 9, 10, 12, 11,
+
+; RNIT: 0, 1 => 7, 7,
+; RNIT: 1 => 4, 4, 5, 8 => 1, 6,
+; RNIT: 1, 2, 3, 13,
+; RNIT: 8, 9, 10, 12, 11,
diff --git a/test/Analysis/RegionInfo/two_loops_same_header.ll b/test/Analysis/RegionInfo/two_loops_same_header.ll
new file mode 100644
index 000000000000..a97182b81a24
--- /dev/null
+++ b/test/Analysis/RegionInfo/two_loops_same_header.ll
@@ -0,0 +1,46 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+ br label %outer
+
+outer:
+ br label %body
+
+body:
+ br i1 1, label %else, label %true77
+
+true77:
+ br i1 1, label %then83, label %else
+
+then83:
+ br label %outer
+
+else:
+ br label %else106
+
+else106:
+ br i1 1, label %end, label %outer
+
+end:
+ ret i8 1
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+; CHECK-NEXT: [2] outer => else
+
+; STAT: 3 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, outer, body, else, else106, end, true77, then83,
+; BBIT: outer, body, else, else106, true77, then83,
+; BBIT: outer, body, true77, then83,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer => else, else, else106,
+; RNIT: outer, body, true77, then83,
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 0bc9ce8241a8..89e8b983c0c0 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -indvars -S > %t
; RUN: grep select %t | count 2
-; RUN: grep {icmp ne i32.\* %w } %t
+; RUN: grep {icmp ne i32.\* } %t
; Indvars should be able to insert a canonical induction variable
; for the bb6 loop without using a maximum calculation (icmp, select)
diff --git a/test/Analysis/ScalarEvolution/max-trip-count.ll b/test/Analysis/ScalarEvolution/max-trip-count.ll
index a8966be4ccd4..843fb073087c 100644
--- a/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -1,8 +1,9 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN: | grep {\{%d,+,\[^\{\}\]\*\}<%bb>}
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
; ScalarEvolution should be able to understand the loop and eliminate the casts.
+; CHECK: {%d,+,sizeof(i32)}
+
define void @foo(i32* nocapture %d, i32 %n) nounwind {
entry:
%0 = icmp sgt i32 %n, 0 ; <i1> [#uses=1]
@@ -32,3 +33,40 @@ bb1.return_crit_edge: ; preds = %bb1
return: ; preds = %bb1.return_crit_edge, %entry
ret void
}
+
+; ScalarEvolution should be able to find the maximum tripcount
+; of this multiple-exit loop, and if it doesn't know the exact
+; count, it should say so.
+
+; PR7845
+; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count.
+; CHECK: Loop %for.cond: max backedge-taken count is 5
+
+@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=2]
+
+define i32 @main() nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5]
+ %cmp = icmp slt i32 %g_4.0, 5 ; <i1> [#uses=1]
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %conv = trunc i32 %g_4.0 to i16 ; <i16> [#uses=1]
+ %tobool.not = icmp eq i16 %conv, 0 ; <i1> [#uses=1]
+ %tobool3 = icmp ne i32 %g_4.0, 0 ; <i1> [#uses=1]
+ %or.cond = and i1 %tobool.not, %tobool3 ; <i1> [#uses=1]
+ br i1 %or.cond, label %for.end, label %for.inc
+
+for.inc: ; preds = %for.body
+ %add = add nsw i32 %g_4.0, 1 ; <i32> [#uses=1]
+ br label %for.cond
+
+for.end: ; preds = %for.body, %for.cond
+ %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/test/Archive/README.txt b/test/Archive/README.txt
index da6cfa4c9ed0..6810befc5857 100644
--- a/test/Archive/README.txt
+++ b/test/Archive/README.txt
@@ -5,7 +5,7 @@ This directory contains various tests of llvm-ar and llvm-ranlib to ensure
compatibility reading other ar(1) formats. It also provides a basic
functionality test for these tools.
-There are four archives stored in CVS with these tests:
+There are four archives accompanying these tests:
GNU.a - constructed on Linux with GNU ar
MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar
diff --git a/test/Assembler/2010-01-06-UnionType.ll b/test/Assembler/2010-01-06-UnionType.ll
deleted file mode 100644
index 37130d66088d..000000000000
--- a/test/Assembler/2010-01-06-UnionType.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%X = type union { i32, i32* }
diff --git a/test/Assembler/align-inst-alloca.ll b/test/Assembler/align-inst-alloca.ll
new file mode 100644
index 000000000000..0343bebf1876
--- /dev/null
+++ b/test/Assembler/align-inst-alloca.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+ %p = alloca i1, align 1073741824
+ ret void
+}
diff --git a/test/Assembler/align-inst-load.ll b/test/Assembler/align-inst-load.ll
new file mode 100644
index 000000000000..3586be2d6e03
--- /dev/null
+++ b/test/Assembler/align-inst-load.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+ load i1* %p, align 1073741824
+ ret void
+}
diff --git a/test/Assembler/align-inst-store.ll b/test/Assembler/align-inst-store.ll
new file mode 100644
index 000000000000..8c3b7124b437
--- /dev/null
+++ b/test/Assembler/align-inst-store.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+ store i1 false, i1* %p, align 1073741824
+ ret void
+}
diff --git a/test/Assembler/align-inst.ll b/test/Assembler/align-inst.ll
new file mode 100644
index 000000000000..6f7100e065d3
--- /dev/null
+++ b/test/Assembler/align-inst.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as %s -o /dev/null
+
+@A = global i1 0, align 536870912
+
+define void @foo() {
+ %p = alloca i1, align 536870912
+ load i1* %p, align 536870912
+ store i1 false, i1* %p, align 536870912
+ ret void
+}
diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll
new file mode 100644
index 000000000000..fe23d26fbeb4
--- /dev/null
+++ b/test/Assembler/comment.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s
+; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s
+
+; The bare version of this file should not have any #uses lines.
+; BARE: @B =
+; BARE-NOT: #uses
+; BARE: }
+
+@B = external global i32
+; ANNOT: @B = external global i32 ; [#uses=0]
+
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+ %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1]
+ ret <4 x i1> %cmp
+}
+
+; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1]
+
+
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index 803d6d343063..ebef58ff459c 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -3,9 +3,9 @@
; Verify that over-indexed getelementptrs are folded.
@A = external global [2 x [3 x [5 x [7 x i32]]]]
@B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523)
-; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5) ; <i32**> [#uses=0]
+; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5)
@C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523)
-; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5) ; <i32**> [#uses=0]
+; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5)
;; Verify that i16 indices work.
@x = external global {i32, i32}
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
index e830106c11ff..bf2178254409 100644
--- a/test/Bindings/Ocaml/analysis.ml
+++ b/test/Bindings/Ocaml/analysis.ml
@@ -1,5 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
- * RUN: ./%t %t.bc
+ * RUN: %t
*)
open Llvm
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index 112ca618301a..30b07d2199f3 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,5 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
- * RUN: ./%t %t.bc
+ * RUN: %t %t.bc
* RUN: llvm-dis < %t.bc | grep caml_int_ty
*)
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index ef1c9ab722c8..8eb923ea32c7 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,5 +1,5 @@
(* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
- * RUN: ./%t %t.bc
+ * RUN: %t %t.bc
* RUN: llvm-dis < %t.bc | grep caml_int_ty
*)
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
index 2caeb82aac22..63040e4a33fc 100644
--- a/test/Bindings/Ocaml/executionengine.ml
+++ b/test/Bindings/Ocaml/executionengine.ml
@@ -1,5 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
- * RUN: ./%t %t.bc
+ * RUN: %t
*)
open Llvm
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
index f28eff28da75..8a6af012ac24 100644
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ b/test/Bindings/Ocaml/scalar_opts.ml
@@ -1,4 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
+ * RUN: %t %t.bc
*)
(* Note: It takes several seconds for ocamlopt to link an executable with
@@ -13,8 +14,11 @@ let context = global_context ()
let void_type = Llvm.void_type context
(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
let suite name f =
- prerr_endline (name ^ ":");
+ if print_checkpoints then
+ prerr_endline (name ^ ":");
f ()
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
index 3c3b7339fef8..bfaf37ca879d 100644
--- a/test/Bindings/Ocaml/target.ml
+++ b/test/Bindings/Ocaml/target.ml
@@ -1,4 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
+ * RUN: %t %t.bc
*)
(* Note: It takes several seconds for ocamlopt to link an executable with
@@ -8,13 +9,17 @@
open Llvm
open Llvm_target
+
let context = global_context ()
let i32_type = Llvm.i32_type context
let i64_type = Llvm.i64_type context
(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
let suite name f =
- prerr_endline (name ^ ":");
+ if print_checkpoints then
+ prerr_endline (name ^ ":");
f ()
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index 506bf50e2a49..e55ab9643e43 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -1,5 +1,5 @@
(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
- * RUN: ./%t %t.bc
+ * RUN: %t %t.bc
* RUN: llvm-dis < %t.bc > %t.ll
*)
@@ -296,12 +296,6 @@ let test_constants () =
insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
= (type_of c));
- group "union";
- let t = union_type context [| i1_type; i16_type; i64_type; double_type |] in
- let c = const_union t one in
- ignore (define_global "const_union" c m);
- insist (t = (type_of c));
-
(* RUN: grep {const_null.*zeroinit} < %t.ll
*)
group "null";
@@ -436,7 +430,7 @@ let test_constants () =
* RUN: grep {const_select.*select} < %t.ll
* RUN: grep {const_extractelement.*extractelement} < %t.ll
* RUN: grep {const_insertelement.*insertelement} < %t.ll
- * RUN: grep {const_shufflevector.*shufflevector} < %t.ll
+ * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll
*)
ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
@@ -455,7 +449,8 @@ let test_constants () =
ignore (define_global "const_shufflevector" (const_shufflevector
(const_vector [| zero; one |])
(const_vector [| one; zero |])
- (const_bitcast foldbomb (vector_type i32_type 2))) m);
+ (const_vector [| const_int i32_type 0; const_int i32_type 1;
+ const_int i32_type 2; const_int i32_type 3 |])) m);
group "asm"; begin
let ft = function_type void_type [| i32_type; i32_type; i32_type |] in
@@ -642,11 +637,18 @@ let test_users () =
let p1 = param fn 0 in
let p2 = param fn 1 in
+ let a3 = build_alloca i32_type "user_alloca" b in
+ let p3 = build_load a3 "user_load" b in
let i = build_add p1 p2 "sum" b in
+ insist ((num_operands i) = 2);
insist ((operand i 0) = p1);
insist ((operand i 1) = p2);
+ set_operand i 1 p3;
+ insist ((operand i 1) != p2);
+ insist ((operand i 1) = p3);
+
ignore (build_unreachable b)
@@ -1154,13 +1156,13 @@ let test_builder () =
group "comparisons"; begin
(* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll
* RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll
- * RUN: grep {%build_icmp_false = fcmp false float %F1, %F2} < %t.ll
- * RUN: grep {%build_icmp_true = fcmp true float %F2, %F1} < %t.ll
+ * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll
+ * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll
*)
ignore (build_icmp Icmp.Ne p1 p2 "build_icmp_ne" atentry);
ignore (build_icmp Icmp.Sle p2 p1 "build_icmp_sle" atentry);
- ignore (build_fcmp Fcmp.False f1 f2 "build_icmp_false" atentry);
- ignore (build_fcmp Fcmp.True f2 f1 "build_icmp_true" atentry)
+ ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry);
+ ignore (build_fcmp Fcmp.True f2 f1 "build_fcmp_true" atentry)
end;
group "miscellaneous"; begin
@@ -1229,13 +1231,19 @@ let test_builder () =
group "dbg"; begin
(* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll
- * RUN: grep {!1 = metadata !\{i32 2, metadata !"dbg test"\}} < %t.ll
+ * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll
*)
- let m1 = const_int i32_type 2 in
- let m2 = mdstring context "dbg test" in
- let md = mdnode context [| m1; m2 |] in
+ insist ((current_debug_location atentry) = None);
+
+ let m_line = const_int i32_type 2 in
+ let m_col = const_int i32_type 3 in
+ let m_scope = mdnode context [| |] in
+ let m_inlined = mdnode context [| |] in
+ let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
set_current_debug_location atentry md;
+ insist ((current_debug_location atentry) = Some md);
+
let i = build_add p1 p2 "dbg" atentry in
insist ((has_metadata i) = true);
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll
new file mode 100644
index 000000000000..8a8767337dca
--- /dev/null
+++ b/test/Bitcode/AutoUpgradeGlobals.ll
@@ -0,0 +1,3 @@
+; This isn't really an assembly file. It just runs test on bitcode to ensure
+; it is auto-upgraded.
+; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh}
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc
new file mode 100644
index 000000000000..1abe9688e291
--- /dev/null
+++ b/test/Bitcode/AutoUpgradeGlobals.ll.bc
Binary files differ
diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll
new file mode 100644
index 000000000000..272cd424e2a2
--- /dev/null
+++ b/test/Bitcode/neon-intrinsics.ll
@@ -0,0 +1,213 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; vmovls should be auto-upgraded to sext
+
+; CHECK: vmovls8
+; CHECK-NOT: arm.neon.vmovls.v8i16
+; CHECK: sext <8 x i8>
+
+; CHECK: vmovls16
+; CHECK-NOT: arm.neon.vmovls.v4i32
+; CHECK: sext <4 x i16>
+
+; CHECK: vmovls32
+; CHECK-NOT: arm.neon.vmovls.v2i64
+; CHECK: sext <2 x i32>
+
+; vmovlu should be auto-upgraded to zext
+
+; CHECK: vmovlu8
+; CHECK-NOT: arm.neon.vmovlu.v8i16
+; CHECK: zext <8 x i8>
+
+; CHECK: vmovlu16
+; CHECK-NOT: arm.neon.vmovlu.v4i32
+; CHECK: zext <4 x i16>
+
+; CHECK: vmovlu32
+; CHECK-NOT: arm.neon.vmovlu.v2i64
+; CHECK: zext <2 x i32>
+
+; vaddl/vaddw should be auto-upgraded to add with sext/zext
+
+; CHECK: vaddls16
+; CHECK-NOT: arm.neon.vaddls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; CHECK: vaddlu32
+; CHECK-NOT: arm.neon.vaddlu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: add <2 x i64>
+
+; CHECK: vaddws8
+; CHECK-NOT: arm.neon.vaddws.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: add <8 x i16>
+
+; CHECK: vaddwu16
+; CHECK-NOT: arm.neon.vaddwu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; vsubl/vsubw should be auto-upgraded to subtract with sext/zext
+
+; CHECK: vsubls16
+; CHECK-NOT: arm.neon.vsubls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: sub <4 x i32>
+
+; CHECK: vsublu32
+; CHECK-NOT: arm.neon.vsublu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: sub <2 x i64>
+
+; CHECK: vsubws8
+; CHECK-NOT: arm.neon.vsubws.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: sub <8 x i16>
+
+; CHECK: vsubwu16
+; CHECK-NOT: arm.neon.vsubwu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: sub <4 x i32>
+
+; vmull should be auto-upgraded to multiply with sext/zext
+; (but vmullp should remain an intrinsic)
+
+; CHECK: vmulls8
+; CHECK-NOT: arm.neon.vmulls.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: sext <8 x i8>
+; CHECK-NEXT: mul <8 x i16>
+
+; CHECK: vmullu16
+; CHECK-NOT: arm.neon.vmullu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: zext <4 x i16>
+; CHECK-NEXT: mul <4 x i32>
+
+; CHECK: vmullp8
+; CHECK: arm.neon.vmullp.v8i16
+
+; vmlal should be auto-upgraded to multiply/add with sext/zext
+
+; CHECK: vmlals32
+; CHECK-NOT: arm.neon.vmlals.v2i64
+; CHECK: sext <2 x i32>
+; CHECK-NEXT: sext <2 x i32>
+; CHECK-NEXT: mul <2 x i64>
+; CHECK-NEXT: add <2 x i64>
+
+; CHECK: vmlalu8
+; CHECK-NOT: arm.neon.vmlalu.v8i16
+; CHECK: zext <8 x i8>
+; CHECK-NEXT: zext <8 x i8>
+; CHECK-NEXT: mul <8 x i16>
+; CHECK-NEXT: add <8 x i16>
+
+; vmlsl should be auto-upgraded to multiply/sub with sext/zext
+
+; CHECK: vmlsls16
+; CHECK-NOT: arm.neon.vmlsls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: mul <4 x i32>
+; CHECK-NEXT: sub <4 x i32>
+
+; CHECK: vmlslu32
+; CHECK-NOT: arm.neon.vmlslu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: mul <2 x i64>
+; CHECK-NEXT: sub <2 x i64>
+
+; vaba should be auto-upgraded to vabd + add
+
+; CHECK: vabas32
+; CHECK-NOT: arm.neon.vabas.v2i32
+; CHECK: arm.neon.vabds.v2i32
+; CHECK-NEXT: add <2 x i32>
+
+; CHECK: vabaQu8
+; CHECK-NOT: arm.neon.vabau.v16i8
+; CHECK: arm.neon.vabdu.v16i8
+; CHECK-NEXT: add <16 x i8>
+
+; vabal should be auto-upgraded to vabd with zext + add
+
+; CHECK: vabals16
+; CHECK-NOT: arm.neon.vabals.v4i32
+; CHECK: arm.neon.vabds.v4i16
+; CHECK-NEXT: zext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; CHECK: vabalu32
+; CHECK-NOT: arm.neon.vabalu.v2i64
+; CHECK: arm.neon.vabdu.v2i32
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: add <2 x i64>
+
+; vabdl should be auto-upgraded to vabd with zext
+
+; CHECK: vabdls8
+; CHECK-NOT: arm.neon.vabdls.v8i16
+; CHECK: arm.neon.vabds.v8i8
+; CHECK-NEXT: zext <8 x i8>
+
+; CHECK: vabdlu16
+; CHECK-NOT: arm.neon.vabdlu.v4i32
+; CHECK: arm.neon.vabdu.v4i16
+; CHECK-NEXT: zext <4 x i16>
+
+; vmovn should be auto-upgraded to trunc
+
+; CHECK: vmovni16
+; CHECK-NOT: arm.neon.vmovn.v8i8
+; CHECK: trunc <8 x i16>
+
+; CHECK: vmovni32
+; CHECK-NOT: arm.neon.vmovn.v4i16
+; CHECK: trunc <4 x i32>
+
+; CHECK: vmovni64
+; CHECK-NOT: arm.neon.vmovn.v2i32
+; CHECK: trunc <2 x i64>
+
+; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
+
+; CHECK: vld1i8
+; CHECK: i32 1
+; CHECK: vld2Qi16
+; CHECK: i32 1
+; CHECK: vld3i32
+; CHECK: i32 1
+; CHECK: vld4Qf
+; CHECK: i32 1
+
+; CHECK: vst1i8
+; CHECK: i32 1
+; CHECK: vst2Qi16
+; CHECK: i32 1
+; CHECK: vst3i32
+; CHECK: i32 1
+; CHECK: vst4Qf
+; CHECK: i32 1
+
+; CHECK: vld2laneQi16
+; CHECK: i32 1
+; CHECK: vld3lanei32
+; CHECK: i32 1
+; CHECK: vld4laneQf
+; CHECK: i32 1
+
+; CHECK: vst2laneQi16
+; CHECK: i32 1
+; CHECK: vst3lanei32
+; CHECK: i32 1
+; CHECK: vst4laneQf
+; CHECK: i32 1
diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc
new file mode 100644
index 000000000000..cabc3c934136
--- /dev/null
+++ b/test/Bitcode/neon-intrinsics.ll.bc
Binary files differ
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll
index 6ad09d2e25cd..18a31eb45d36 100644
--- a/test/BugPoint/crash-narrowfunctiontest.ll
+++ b/test/BugPoint/crash-narrowfunctiontest.ll
@@ -1,6 +1,8 @@
; Test that bugpoint can narrow down the testcase to the important function
+; FIXME: This likely fails on windows
;
-; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; XFAIL: mingw
define i32 @foo() { ret i32 1 }
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
new file mode 100644
index 000000000000..f2541ee3f9ac
--- /dev/null
+++ b/test/BugPoint/metadata.ll
@@ -0,0 +1,35 @@
+; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; XFAIL: mingw
+
+; Bugpoint should keep the call's metadata attached to the call.
+
+; CHECK: call void @foo(), !dbg !0, !attach !2
+; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1}
+; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
+; CHECK: !2 = metadata !{metadata !"the call to foo"}
+
+%rust_task = type {}
+define void @test(i32* %a, i8* %b) {
+ %s = mul i8 22, 9, !attach !0, !dbg !10
+ store i8 %s, i8* %b, !attach !1, !dbg !11
+ call void @foo(), !attach !2, !dbg !12
+ store i32 7, i32* %a, !attach !3, !dbg !13
+ %t = add i32 0, 5, !attach !4, !dbg !14
+ ret void
+}
+
+declare void @foo()
+
+!0 = metadata !{metadata !"boring"}
+!1 = metadata !{metadata !"uninteresting"}
+!2 = metadata !{metadata !"the call to foo"}
+!3 = metadata !{metadata !"noise"}
+!4 = metadata !{metadata !"filler"}
+
+!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
+!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9}
+!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9}
+!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9}
+!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9}
+!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9}
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 439ea545468e..791ec69a23d2 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -1,5 +1,7 @@
-; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes
+; FIXME: This likely fails on windows
+; RUN: bugpoint -load %llvmlibsdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; XFAIL: mingw
; Test to make sure that arguments are removed from the function if they are
; unnecessary. And clean up any types that that frees up too.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 433af900dd2b..ad9a2432dbfa 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,6 +24,23 @@ endif()
include(FindPythonInterp)
if(PYTHONINTERP_FOUND)
+ get_directory_property(DEFINITIONS COMPILE_DEFINITIONS)
+ foreach(DEF ${DEFINITIONS})
+ set(DEFS "${DEFS} -D${DEF}")
+ endforeach()
+ get_directory_property(INC_DIRS INCLUDE_DIRECTORIES)
+ foreach(INC_DIR ${INC_DIRS})
+ set(IDIRS "${IDIRS} -I${INC_DIR}")
+ endforeach()
+ string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
+ string(REPLACE "<DEFINES>" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+ string(REPLACE "<FLAGS>" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+ string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+ string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+ set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}")
+ if(NOT MSVC)
+ set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++")
+ endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
${CMAKE_CURRENT_BINARY_DIR}/site.exp)
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 81483cb4e7c5..ee63656b26d3 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
@quant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@dequant_coef = external global [6 x [4 x [4 x i32]]] ; <[6 x [4 x [4 x i32]]]*> [#uses=1]
@A = external global [4 x [4 x i32]] ; <[4 x [4 x i32]]*> [#uses=1]
+; CHECK: dct_luma_sp:
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
entry:
+; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK-NOT: sub sp
+; CHECK: vstmdb sp!
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
br label %cond_next489
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
index d741112e2886..76fa3649c880 100644
--- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -2,7 +2,7 @@
; PR1266
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-linux-gnueabi"
+target triple = "arm-unknown-linux-gnueabi"
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
index 030486a7c983..7ba2a190be73 100644
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -2,7 +2,7 @@
; PR1424
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-linux-gnueabi"
+target triple = "arm-unknown-linux-gnueabi"
%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
index 198faebbea6f..f89a5de77b3f 100644
--- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
+++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
@@ -17,3 +17,17 @@ entry:
store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
ret void
}
+
+; Radar 8290937: Ignore undef shuffle indices.
+; CHECK: t2
+; CHECK: vtrn.16
+define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind {
+entry:
+ %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
+ %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+ %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0
+ store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16
+ %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0
+ store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index ff60fa8c49d8..e47c03839375 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -5,32 +5,32 @@
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
- %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
%tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
- %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
%tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
- %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
- %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
- %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
%tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
- %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
%tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
%tmp2bd = add <8 x i8> %tmp2b, %tmp2d ; <<8 x i8>> [#uses=1]
%tmp4bd = add <8 x i8> %tmp4b, %tmp4d ; <<8 x i8>> [#uses=1]
%tmp2abcd = mul <8 x i8> undef, %tmp2bd ; <<8 x i8>> [#uses=1]
%tmp4abcd = mul <8 x i8> undef, %tmp4bd ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd)
+ call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
%tmp2ef = sub <8 x i8> %tmp2e, %tmp2f ; <<8 x i8>> [#uses=1]
%tmp2gh = sub <8 x i8> %tmp2g, %tmp2h ; <<8 x i8>> [#uses=1]
%tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h ; <<8 x i8>> [#uses=1]
@@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A
%tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh ; <<8 x i8>> [#uses=1]
%tmp3efgh = mul <8 x i8> undef, %tmp3gh ; <<8 x i8>> [#uses=1]
%tmp4efgh = mul <8 x i8> %tmp4ef, undef ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh)
+ call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
%tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef)
+ tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
ret <8 x i8> %tmp4
}
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index ce959d1b91c8..cd1c9c8c0421 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -36,8 +36,8 @@ entry:
%tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
%19 = fmul <4 x float> %tmp5, %2
%20 = bitcast float* %fltp to i8*
- tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19)
+ tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index e4f20990bed2..6f487962310f 100644
--- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -12,8 +12,8 @@ entry:
%tmp9 = trunc i128 %tmp8 to i64 ; <i64> [#uses=1]
%tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1]
%tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i) nounwind
+ tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
ret void
}
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index 7650d883d7b1..ac8e80904eda 100755
--- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi | FileCheck %s
; PR 7433
+; XFAIL: *
%0 = type { i8*, i8* }
%1 = type { i8*, i8*, i8* }
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index 0c5b180cf846..ffc47ebdf196 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10"
define i32 @test(i8* %arg) nounwind {
entry:
- %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg)
+ %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1)
%1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2>
store <2 x i64> %1, <2 x i64>* undef, align 16
ret i32 undef
}
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
new file mode 100644
index 000000000000..c03c81545946
--- /dev/null
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -0,0 +1,95 @@
+; RUN: llc -enable-correct-eh-support < %s
+; PR7716
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%0 = type { i8*, i8* }
+%struct.A = type { i32 }
+
+@d = internal global i32 0, align 4 ; <i32*> [#uses=6]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1A = internal constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1]
+@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1]
+@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1]
+@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1]
+@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1]
+@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1]
+@str1 = internal constant [8 x i8] c"Caught.\00" ; <[8 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @_Unwind_SjLj_Resume(i8*)
+
+define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
+entry:
+ %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+ %tmp2.i = load i32* %tmp.i ; <i32> [#uses=1]
+ %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
+ %tmp3.i = load i32* @d ; <i32> [#uses=1]
+ %inc.i = add nsw i32 %tmp3.i, 1 ; <i32> [#uses=1]
+ store i32 %inc.i, i32* @d
+ ret void
+}
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @main() ssp {
+entry:
+ %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2]
+ %tmp2.i.i.i = bitcast i8* %exception.i to i32* ; <i32*> [#uses=1]
+ store i32 1, i32* %tmp2.i.i.i
+ %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
+ invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn
+ to label %.noexc unwind label %lpad
+
+.noexc: ; preds = %entry
+ unreachable
+
+try.cont: ; preds = %lpad
+ %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0]
+ %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+ %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
+ %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+ %tmp3.i.i = load i32* @d ; <i32> [#uses=1]
+ %inc.i.i4 = add nsw i32 %tmp3.i.i, 1 ; <i32> [#uses=1]
+ store i32 %inc.i.i4, i32* @d
+ tail call void @__cxa_end_catch()
+ %tmp13 = load i32* @d ; <i32> [#uses=1]
+ %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
+ %tmp16 = load i32* @d ; <i32> [#uses=1]
+ %cmp = icmp ne i32 %tmp16, 2 ; <i1> [#uses=1]
+ %conv = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv
+
+lpad: ; preds = %entry
+ %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
+ %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+ %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
+ %3 = icmp eq i32 %eh.selector, %2 ; <i1> [#uses=1]
+ br i1 %3, label %try.cont, label %eh.resume
+
+eh.resume: ; preds = %lpad
+ tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn
+ unreachable
+}
+
+declare i8* @__cxa_get_exception_ptr(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
new file mode 100644
index 000000000000..f57b7e676949
--- /dev/null
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; <rdar://problem/8264008>
+
+define linkonce_odr arm_apcscc void @func1() {
+entry:
+ %save_filt.936 = alloca i32 ; <i32*> [#uses=2]
+ %save_eptr.935 = alloca i8* ; <i8**> [#uses=2]
+ %eh_exception = alloca i8* ; <i8**> [#uses=5]
+ %eh_selector = alloca i32 ; <i32*> [#uses=3]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call arm_apcscc void @func2()
+ br label %return
+
+bb: ; No predecessors!
+ %eh_select = load i32* %eh_selector ; <i32> [#uses=1]
+ store i32 %eh_select, i32* %save_filt.936, align 4
+ %eh_value = load i8** %eh_exception ; <i8*> [#uses=1]
+ store i8* %eh_value, i8** %save_eptr.935, align 4
+ invoke arm_apcscc void @func3()
+ to label %invcont unwind label %lpad
+
+invcont: ; preds = %bb
+ %tmp6 = load i8** %save_eptr.935, align 4 ; <i8*> [#uses=1]
+ store i8* %tmp6, i8** %eh_exception, align 4
+ %tmp7 = load i32* %save_filt.936, align 4 ; <i32> [#uses=1]
+ store i32 %tmp7, i32* %eh_selector, align 4
+ br label %Unwind
+
+bb12: ; preds = %ppad
+ call arm_apcscc void @_ZSt9terminatev() noreturn nounwind
+ unreachable
+
+return: ; preds = %entry
+ ret void
+
+lpad: ; preds = %bb
+ %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1]
+ store i8* %eh_ptr, i8** %eh_exception
+ %eh_ptr13 = load i8** %eh_exception ; <i8*> [#uses=1]
+ %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+ store i32 %eh_select14, i32* %eh_selector
+ br label %ppad
+
+ppad:
+ br label %bb12
+
+Unwind:
+ %eh_ptr15 = load i8** %eh_exception
+ call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
+ unreachable
+}
+
+declare arm_apcscc void @func2()
+
+declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
+
+declare arm_apcscc void @func3()
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll
index cc718399ea96..bb7853e66ef4 100644
--- a/test/CodeGen/ARM/arguments.ll
+++ b/test/CodeGen/ARM/arguments.ll
@@ -1,11 +1,43 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN
-define i32 @f(i32 %a, i64 %b) {
+define i32 @f1(i32 %a, i64 %b) {
+; ELF: f1:
; ELF: mov r0, r2
+; DARWIN: f1:
; DARWIN: mov r0, r1
- %tmp = call i32 @g(i64 %b)
+ %tmp = call i32 @g1(i64 %b)
ret i32 %tmp
}
-declare i32 @g(i64)
+; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
+define i32 @f2() nounwind optsize {
+; ELF: f2:
+; ELF: mov r0, #128
+; ELF: str r0, [sp]
+; DARWIN: f2:
+; DARWIN: mov r3, #128
+entry:
+ %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
+ %not. = icmp ne i32 %0, 128 ; <i1> [#uses=1]
+ %.0 = zext i1 %not. to i32 ; <i32> [#uses=1]
+ ret i32 %.0
+}
+
+; test that on gnueabi a 64 bit value at this position will cause r3 to go
+; unused and the value stored in [sp]
+; ELF: f3:
+; ELF: ldr r0, [sp]
+; ELF-NEXT: mov pc, lr
+; DARWIN: f3:
+; DARWIN: mov r0, r3
+; DARWIN-NEXT: mov pc, lr
+define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) {
+entry:
+ %0 = trunc i64 %l to i32
+ ret i32 %0
+}
+
+declare i32 @g1(i64)
+
+declare i32 @g2(i32 %i, ...)
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
new file mode 100644
index 000000000000..59e2b43a9172
--- /dev/null
+++ b/test/CodeGen/ARM/bfi.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: mov r2, #10
+; CHECK: bfi r1, r2, #22, #4
+ %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %1 = and i32 %0, -62914561 ; <i32> [#uses=1]
+ %2 = or i32 %1, 41943040 ; <i32> [#uses=1]
+ store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+ ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f2
+; CHECK: mov r1, r1, lsr #7
+; CHECK: bfi r0, r1, #7, #16
+ %and = and i32 %A, -8388481 ; <i32> [#uses=1]
+ %and2 = and i32 %B, 8388480 ; <i32> [#uses=1]
+ %or = or i32 %and2, %and ; <i32> [#uses=1]
+ ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f3
+; CHECK: mov r2, r0, lsr #7
+; CHECK: mov r0, r1
+; CHECK: bfi r0, r2, #7, #16
+ %and = and i32 %A, 8388480 ; <i32> [#uses=1]
+ %and2 = and i32 %B, -8388481 ; <i32> [#uses=1]
+ %or = or i32 %and2, %and ; <i32> [#uses=1]
+ ret i32 %or
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f1269d5bd2be..db5afe3f56cb 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+; XFAIL: *
@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
new file mode 100644
index 000000000000..25c556889fc4
--- /dev/null
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; PHI elimination shouldn't break backedge.
+; rdar://8263994
+
+%struct.list_data_s = type { i16, i16 }
+%struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
+
+define arm_apcscc %struct.list_head* @t(%struct.list_head* %list) nounwind {
+entry:
+ %0 = icmp eq %struct.list_head* %list, null
+ br i1 %0, label %bb2, label %bb
+
+bb:
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
+; CHECK: bx lr
+ %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
+ %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
+ %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0
+ %2 = load %struct.list_head** %1, align 4
+ store %struct.list_head* %next.04, %struct.list_head** %1, align 4
+ %3 = icmp eq %struct.list_head* %2, null
+ br i1 %3, label %bb2, label %bb
+
+bb2:
+ %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
+ ret %struct.list_head* %next.0.lcssa
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index d833afa55583..448b437ddf46 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,13 +1,9 @@
; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -march=arm -mcpu=cortex-m3 \
-; RUN: | FileCheck %s -check-prefix=CHECK-ARMV7M
define i32 @f1(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f1
; CHECK-ARM: __divsi3
-; CHECK-ARMV7M: f1
-; CHECK-ARMV7M: sdiv
%tmp1 = sdiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -16,8 +12,6 @@ define i32 @f2(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f2
; CHECK-ARM: __udivsi3
-; CHECK-ARMV7M: f2
-; CHECK-ARMV7M: udiv
%tmp1 = udiv i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -26,8 +20,6 @@ define i32 @f3(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f3
; CHECK-ARM: __modsi3
-; CHECK-ARMV7M: f3
-; CHECK-ARMV7M: sdiv
%tmp1 = srem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
@@ -36,8 +28,6 @@ define i32 @f4(i32 %a, i32 %b) {
entry:
; CHECK-ARM: f4
; CHECK-ARM: __umodsi3
-; CHECK-ARMV7M: f4
-; CHECK-ARMV7M: udiv
%tmp1 = urem i32 %a, %b ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
new file mode 100644
index 000000000000..3bee84d84de4
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=armv7-apple-darwin
+; RUN: llc < %s -O0 -arm-fast-isel -fast-isel-abort -mtriple=thumbv7-apple-darwin
+
+; Very basic fast-isel functionality.
+
+define i32 @add(i32 %a, i32 %b) nounwind ssp {
+entry:
+ %a.addr = alloca i32, align 4
+ %b.addr = alloca i32, align 4
+ store i32 %a, i32* %a.addr
+ store i32 %b, i32* %b.addr
+ %tmp = load i32* %a.addr
+ %tmp1 = load i32* %b.addr
+ %add = add nsw i32 %tmp, %tmp1
+ ret i32 %add
+}
+
+define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
+entry:
+ %r = load i32* %p
+ %s = load i32* %q
+ %y = load i32** %z
+ br label %fast
+
+fast:
+ %t0 = add i32 %r, %s
+ %t1 = mul i32 %t0, %s
+ %t2 = sub i32 %t1, %s
+ %t3 = and i32 %t2, %s
+ %t4 = xor i32 %t3, 3
+ %t5 = xor i32 %t4, %s
+ %t6 = add i32 %t5, 2
+ %t7 = getelementptr i32* %y, i32 1
+ %t8 = getelementptr i32* %t7, i32 %t6
+ br label %exit
+
+exit:
+ ret i32* %t8
+}
diff --git a/test/CodeGen/ARM/fnmuls.ll b/test/CodeGen/ARM/fnmuls.ll
index efd87d2dcb89..3223885feda9 100644
--- a/test/CodeGen/ARM/fnmuls.ll
+++ b/test/CodeGen/ARM/fnmuls.ll
@@ -1,20 +1,18 @@
-; XFAIL: *
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s
-; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
-define float @test1(float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0
+define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1
entry:
%0 = fmul float %a, %b
%1 = fsub float -0.0, %0
ret float %1
}
-define float @test2(float %a, float %b) nounwind {
-; CHECK: fnmscs s2, s1, s0
+define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1
entry:
%0 = fmul float %a, %b
%1 = fmul float -1.0, %0
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 6875288304be..64350591b87f 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
; rdar://7461510
diff --git a/test/CodeGen/ARM/fpowi.ll b/test/CodeGen/ARM/fpowi.ll
index 7f9d62a9e945..561463720c80 100644
--- a/test/CodeGen/ARM/fpowi.ll
+++ b/test/CodeGen/ARM/fpowi.ll
@@ -3,7 +3,7 @@
; ModuleID = '<stdin>'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-linux-gnueabi"
+target triple = "arm-unknown-linux-gnueabi"
define double @_ZSt3powdi(double %__x, i32 %__i) {
entry:
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 688b7bc312c7..1ec4d15f6672 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -24,8 +24,7 @@ define i32 @f2(i64 %x, i64 %y) {
; CHECK: f2
; CHECK: mov r0, r0, lsr r2
; CHECK-NEXT: rsb r3, r2, #32
-; CHECK-NEXT: sub r2, r2, #32
-; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: subs r2, r2, #32
; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: movge r0, r1, asr r2
%a = ashr i64 %x, %y
@@ -37,8 +36,7 @@ define i32 @f3(i64 %x, i64 %y) {
; CHECK: f3
; CHECK: mov r0, r0, lsr r2
; CHECK-NEXT: rsb r3, r2, #32
-; CHECK-NEXT: sub r2, r2, #32
-; CHECK-NEXT: cmp r2, #0
+; CHECK-NEXT: subs r2, r2, #32
; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: movge r0, r1, lsr r2
%a = lshr i64 %x, %y
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
index 25cf1356d61c..866be423c2cb 100644
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -4,14 +4,14 @@
; constant offset addressing, so that each of the following stores
; uses the same register.
-; CHECK: vstr.32 s0, [r9, #-128]
-; CHECK: vstr.32 s0, [r9, #-96]
-; CHECK: vstr.32 s0, [r9, #-64]
-; CHECK: vstr.32 s0, [r9, #-32]
-; CHECK: vstr.32 s0, [r9]
-; CHECK: vstr.32 s0, [r9, #32]
-; CHECK: vstr.32 s0, [r9, #64]
-; CHECK: vstr.32 s0, [r9, #96]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
+; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
@@ -628,8 +628,7 @@ bb24: ; preds = %bb23
; CHECK: @ %bb24
; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1
-; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1
-; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0
+; CHECK-NEXT: sub{{.*}} [[REGISTER:(r[0-9]+)|(lr)]], #1
; CHECK-NEXT: bne.w
%92 = icmp eq i32 %tmp81, %indvar78 ; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/pack.ll b/test/CodeGen/ARM/pack.ll
index 1e2e7aa0c8ff..4905dc28cf48 100644
--- a/test/CodeGen/ARM/pack.ll
+++ b/test/CodeGen/ARM/pack.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=arm -mattr=+v6 | \
-; RUN: grep pkhbt | count 5
-; RUN: llc < %s -march=arm -mattr=+v6 | \
-; RUN: grep pkhtb | count 4
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+; CHECK: test1
+; CHECK: pkhbt r0, r0, r1, lsl #16
define i32 @test1(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1]
@@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test1a
+; CHECK: pkhbt r0, r0, r1, lsl #16
define i32 @test1a(i32 %X, i32 %Y) {
%tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1]
@@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test2
+; CHECK: pkhbt r0, r0, r1, lsl #12
define i32 @test2(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1]
@@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) {
ret i32 %tmp57
}
+; CHECK: test3
+; CHECK: pkhbt r0, r0, r1, lsl #18
define i32 @test3(i32 %X, i32 %Y) {
%tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1]
@@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test4
+; CHECK: pkhbt r0, r0, r1
define i32 @test4(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1]
@@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) {
ret i32 %tmp46
}
+; CHECK: test5
+; CHECK: pkhtb r0, r0, r1, asr #16
define i32 @test5(i32 %X, i32 %Y) {
%tmp17 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1]
@@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test5a
+; CHECK: pkhtb r0, r0, r1, asr #16
define i32 @test5a(i32 %X, i32 %Y) {
%tmp110 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1]
@@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test6
+; CHECK: pkhtb r0, r0, r1, asr #12
define i32 @test6(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1]
@@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) {
ret i32 %tmp59
}
+; CHECK: test7
+; CHECK: pkhtb r0, r0, r1, asr #18
define i32 @test7(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1]
@@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) {
%tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
ret i32 %tmp57
}
+
+; CHECK: test8
+; CHECK: pkhtb r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, -65536
+ %tmp3 = lshr i32 %Y, 22
+ %tmp57 = or i32 %tmp3, %tmp1
+ ret i32 %tmp57
+}
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 89b657797f2a..2e4f10d8a63d 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -23,21 +23,21 @@ entry:
%2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
%3 = load <4 x i32>* %2, align 16 ; <<4 x i32>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
- %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1]
+ %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
%6 = bitcast <8 x i16> %5 to <2 x double> ; <<2 x double>> [#uses=2]
%7 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1]
%8 = bitcast double %7 to <4 x i16> ; <<4 x i16>> [#uses=1]
- %9 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %8) ; <<4 x i32>> [#uses=1]
+ %9 = sext <4 x i16> %8 to <4 x i32> ; <<4 x i32>> [#uses=1]
%10 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1]
%11 = bitcast double %10 to <4 x i16> ; <<4 x i16>> [#uses=1]
- %12 = tail call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %11) ; <<4 x i32>> [#uses=1]
+ %12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1]
%13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1]
%14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1]
%15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
%16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
%17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
%18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17)
+ tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
ret void
}
@@ -45,10 +45,10 @@ define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr,
entry:
; CHECK: t2:
; CHECK: vld1.16
-; CHECK: vmul.i16
; CHECK-NOT: vmov
; CHECK: vld1.16
; CHECK: vmul.i16
+; CHECK: vmul.i16
; CHECK-NOT: vmov
; CHECK: vst1.16
; CHECK: vst1.16
@@ -57,17 +57,17 @@ entry:
%2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
%3 = load <8 x i16>* %2, align 16 ; <<8 x i16>> [#uses=1]
%4 = bitcast i16* %i_ptr to i8* ; <i8*> [#uses=1]
- %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4) ; <<8 x i16>> [#uses=1]
+ %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
%6 = getelementptr inbounds i16* %i_ptr, i32 8 ; <i16*> [#uses=1]
%7 = bitcast i16* %6 to i8* ; <i8*> [#uses=1]
- %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7) ; <<8 x i16>> [#uses=1]
+ %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
%9 = mul <8 x i16> %1, %5 ; <<8 x i16>> [#uses=1]
%10 = mul <8 x i16> %3, %8 ; <<8 x i16>> [#uses=1]
%11 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9)
+ tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
%12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
%13 = bitcast i16* %12 to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10)
+ tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
ret void
}
@@ -77,14 +77,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
; CHECK: vmul.i8
; CHECK-NOT: vmov
; CHECK: vst3.8
- %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
%tmp5 = sub <8 x i8> %tmp3, %tmp4
%tmp6 = add <8 x i8> %tmp2, %tmp3 ; <<8 x i8>> [#uses=1]
%tmp7 = mul <8 x i8> %tmp4, %tmp2
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+ tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
ret <8 x i8> %tmp4
}
@@ -97,10 +97,10 @@ entry:
; CHECK-NOT: vmov
; CHECK: bne
%tmp1 = bitcast i32* %in to i8* ; <i8*> [#uses=1]
- %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp3 = getelementptr inbounds i32* %in, i32 8 ; <i32*> [#uses=1]
%tmp4 = bitcast i32* %tmp3 to i8* ; <i8*> [#uses=1]
- %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp8 = bitcast i32* %out to i8* ; <i8*> [#uses=1]
br i1 undef, label %return1, label %return2
@@ -116,7 +116,7 @@ return1:
%tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
%tmp6 = add <4 x i32> %tmp52, %tmp ; <<4 x i32>> [#uses=1]
%tmp7 = add <4 x i32> %tmp57, %tmp39 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7)
+ tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
ret void
return2:
@@ -128,7 +128,7 @@ return2:
%tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
%tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
%tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101)
+ tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
call void @llvm.trap()
unreachable
}
@@ -143,7 +143,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
; CHECK: vadd.i16
%tmp0 = bitcast i16* %A to i8* ; <i8*> [#uses=1]
%tmp1 = load <8 x i16>* %B ; <<8 x i16>> [#uses=2]
- %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
%tmp5 = add <8 x i16> %tmp3, %tmp4 ; <<8 x i16>> [#uses=1]
@@ -156,7 +156,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
; CHECK: vmov d1, d0
; CHECK-NEXT: vld2.8 {d0[1], d1[1]}
%tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2]
- %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
%tmp5 = add <8 x i8> %tmp3, %tmp4 ; <<8 x i8>> [#uses=1]
@@ -174,14 +174,14 @@ entry:
; CHECK: vuzp.32 q0, q1
; CHECK: vst1.32
%0 = bitcast i32* %iptr to i8* ; <i8*> [#uses=2]
- %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+ %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
%tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
%tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
%2 = bitcast i32* %optr to i8* ; <i8*> [#uses=2]
- tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60)
- %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0) ; <<4 x i32>> [#uses=1]
+ tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
+ %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4)
+ tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1)
ret void
}
@@ -304,44 +304,43 @@ bb14: ; preds = %bb6
; This test crashes the coalescer because live variables were not updated properly.
define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
- %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
- %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+ %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
%tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
%tmp2bd = add <8 x i8> zeroinitializer, %tmp2d ; <<8 x i8>> [#uses=1]
%tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
%tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f ; <<8 x i8>> [#uses=1]
%tmp2efgh = mul <8 x i8> %tmp2ef, undef ; <<8 x i8>> [#uses=2]
- call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh)
+ call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
%tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd ; <<8 x i8>> [#uses=1]
%tmp7 = mul <8 x i8> undef, %tmp2 ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7)
+ tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
ret <8 x i8> undef
}
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
-
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+nounwind
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll
index 1e780e6a9097..6b86f1a9f368 100644
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization"
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -o /dev/null -stats -info-output-file - | grep "Number of re-materialization"
define i32 @main(i32 %argc, i8** nocapture %argv, double %d1, double %d2) nounwind {
entry:
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 29c55c6bd975..7413bed5c5b1 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -march=arm | FileCheck %s
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
define i32 @f1(i32 %a.s) {
;CHECK: f1:
@@ -65,3 +66,27 @@ define double @f7(double %a, double %b) {
%tmp1 = select i1 %tmp, double -1.000e+00, double %b
ret double %tmp1
}
+
+; <rdar://problem/7260094>
+;
+; We used to generate really horrible code for this function. The main cause was
+; a lack of a custom lowering routine for an ISD::SELECT. This would result in
+; two "it" blocks in the code: one for the "icmp" and another to move the index
+; into the constant pool based on the value of the "icmp". If we have one "it"
+; block generated, odds are good that we have close to the ideal code for this:
+;
+; CHECK-NEON: _f8:
+; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
+; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
+; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
+; CHECK-NEON-NEXT: it eq
+; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: ldr
+; CHECK-NEON: bx
+
+define arm_apcscc float @f8(i32 %a) nounwind {
+ %tmp = icmp eq i32 %a, 1123
+ %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
+ ret float %tmp1
+}
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 792ef79982b7..ae1ba2f73825 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -7,7 +7,7 @@
%quux = type { i32 (...)**, %baz*, i32 }
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa:
@@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) {
; CHECK: vst1.64 {{.*}}sp, :128
; CHECK: vld1.64 {{.*}}sp, :128
entry:
- %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4
- %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 0.000000e+00, float* undef, align 4
- %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
%val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
diff --git a/test/CodeGen/ARM/t2-imm.ll b/test/CodeGen/ARM/t2-imm.ll
index 848a4dfed054..8b4145914e7c 100644
--- a/test/CodeGen/ARM/t2-imm.ll
+++ b/test/CodeGen/ARM/t2-imm.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
define i32 @f6(i32 %a) {
; CHECK:f6
-; CHECK: movw r0, #:lower16:65537123
-; CHECK: movt r0, #:upper16:65537123
+; CHECK: movw r0, #1123
+; CHECK: movt r0, #1000
%tmp = add i32 0, 65537123
ret i32 %tmp
}
diff --git a/test/CodeGen/ARM/vaba.ll b/test/CodeGen/ARM/vaba.ll
index e2dca4647bce..4fe1c434799d 100644
--- a/test/CodeGen/ARM/vaba.ll
+++ b/test/CodeGen/ARM/vaba.ll
@@ -6,8 +6,9 @@ define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i8> %tmp4
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+ %tmp5 = add <8 x i8> %tmp1, %tmp4
+ ret <8 x i8> %tmp5
}
define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -16,8 +17,9 @@ define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i16> %tmp4
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+ %tmp5 = add <4 x i16> %tmp1, %tmp4
+ ret <4 x i16> %tmp5
}
define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -26,8 +28,9 @@ define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i32> %tmp4
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+ %tmp5 = add <2 x i32> %tmp1, %tmp4
+ ret <2 x i32> %tmp5
}
define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
@@ -36,8 +39,9 @@ define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i8> %tmp4
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+ %tmp5 = add <8 x i8> %tmp1, %tmp4
+ ret <8 x i8> %tmp5
}
define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -46,8 +50,9 @@ define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i16> %tmp4
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+ %tmp5 = add <4 x i16> %tmp1, %tmp4
+ ret <4 x i16> %tmp5
}
define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -56,8 +61,9 @@ define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i32> %tmp4
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+ %tmp5 = add <2 x i32> %tmp1, %tmp4
+ ret <2 x i32> %tmp5
}
define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
@@ -66,8 +72,9 @@ define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = load <16 x i8>* %C
- %tmp4 = call <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
- ret <16 x i8> %tmp4
+ %tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+ %tmp5 = add <16 x i8> %tmp1, %tmp4
+ ret <16 x i8> %tmp5
}
define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
@@ -76,8 +83,9 @@ define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = load <8 x i16>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+ %tmp5 = add <8 x i16> %tmp1, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
@@ -86,8 +94,9 @@ define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = load <4 x i32>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+ %tmp5 = add <4 x i32> %tmp1, %tmp4
+ ret <4 x i32> %tmp5
}
define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
@@ -96,8 +105,9 @@ define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
%tmp1 = load <16 x i8>* %A
%tmp2 = load <16 x i8>* %B
%tmp3 = load <16 x i8>* %C
- %tmp4 = call <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> %tmp3)
- ret <16 x i8> %tmp4
+ %tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+ %tmp5 = add <16 x i8> %tmp1, %tmp4
+ ret <16 x i8> %tmp5
}
define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
@@ -106,8 +116,9 @@ define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i16>* %B
%tmp3 = load <8 x i16>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+ %tmp5 = add <8 x i16> %tmp1, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
@@ -116,25 +127,26 @@ define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i32>* %B
%tmp3 = load <4 x i32>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+ %tmp5 = add <4 x i32> %tmp1, %tmp4
+ ret <4 x i32> %tmp5
}
-declare <8 x i8> @llvm.arm.neon.vabas.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vabas.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vabas.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <8 x i8> @llvm.arm.neon.vabau.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vabau.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vabau.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vabas.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vabas.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabas.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare <16 x i8> @llvm.arm.neon.vabau.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.arm.neon.vabau.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabau.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
;CHECK: vabals8:
@@ -142,8 +154,10 @@ define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+ %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+ %tmp6 = add <8 x i16> %tmp1, %tmp5
+ ret <8 x i16> %tmp6
}
define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -152,8 +166,10 @@ define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+ %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+ %tmp6 = add <4 x i32> %tmp1, %tmp5
+ ret <4 x i32> %tmp6
}
define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -162,8 +178,10 @@ define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+ %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+ %tmp6 = add <2 x i64> %tmp1, %tmp5
+ ret <2 x i64> %tmp6
}
define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
@@ -172,8 +190,10 @@ define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+ %tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+ %tmp6 = add <8 x i16> %tmp1, %tmp5
+ ret <8 x i16> %tmp6
}
define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -182,8 +202,10 @@ define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+ %tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+ %tmp6 = add <4 x i32> %tmp1, %tmp5
+ ret <4 x i32> %tmp6
}
define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -192,14 +214,8 @@ define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+ %tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+ %tmp6 = add <2 x i64> %tmp1, %tmp5
+ ret <2 x i64> %tmp6
}
-
-declare <8 x i16> @llvm.arm.neon.vabals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vabalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index 2b4539361459..9ec734fa7641 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -151,8 +151,9 @@ define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vabdl.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -160,8 +161,9 @@ define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vabdl.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -169,8 +171,9 @@ define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vabdl.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
}
define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -178,8 +181,9 @@ define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vabdl.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -187,8 +191,9 @@ define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vabdl.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -196,14 +201,7 @@ define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vabdl.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+ ret <2 x i64> %tmp4
}
-
-declare <8 x i16> @llvm.arm.neon.vabdls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabdls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabdls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vabdlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vabdlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vabdlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vadd.ll b/test/CodeGen/ARM/vadd.ll
index 9bb8bf561045..a830e968ff78 100644
--- a/test/CodeGen/ARM/vadd.ll
+++ b/test/CodeGen/ARM/vadd.ll
@@ -157,8 +157,10 @@ define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vaddl.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -166,8 +168,10 @@ define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vaddl.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -175,8 +179,10 @@ define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vaddl.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -184,8 +190,10 @@ define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vaddl.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -193,8 +201,10 @@ define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vaddl.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -202,25 +212,20 @@ define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vaddl.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
-declare <8 x i16> @llvm.arm.neon.vaddls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vaddlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK: vaddws8:
;CHECK: vaddw.s8
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
@@ -228,8 +233,9 @@ define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK: vaddw.s16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
@@ -237,8 +243,9 @@ define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK: vaddw.s32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
}
define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
@@ -246,8 +253,9 @@ define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK: vaddw.u8
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp4 = add <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
@@ -255,8 +263,9 @@ define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK: vaddw.u16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp4 = add <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
@@ -264,14 +273,7 @@ define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK: vaddw.u32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp4 = add <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
}
-
-declare <8 x i16> @llvm.arm.neon.vaddws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vaddwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vaddwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vaddwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index c11a67c6c434..e460a84f6265 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -54,3 +54,23 @@ define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
ret <4 x i32> %tmp3
}
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+ ret <16 x i8> %tmp3
+}
+
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index c61ea8c9a789..2488e8a0d0cc 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -3,7 +3,7 @@
define <8 x i8> @vld1i8(i8* %A) nounwind {
;CHECK: vld1i8:
;CHECK: vld1.8
- %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
+ %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 1)
ret <8 x i8> %tmp1
}
@@ -11,7 +11,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
;CHECK: vld1i16:
;CHECK: vld1.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0)
+ %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
ret <4 x i16> %tmp1
}
@@ -19,7 +19,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
;CHECK: vld1i32:
;CHECK: vld1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0)
+ %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
ret <2 x i32> %tmp1
}
@@ -27,7 +27,7 @@ define <2 x float> @vld1f(float* %A) nounwind {
;CHECK: vld1f:
;CHECK: vld1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0)
+ %tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
ret <2 x float> %tmp1
}
@@ -35,14 +35,14 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
;CHECK: vld1i64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0)
+ %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
ret <1 x i64> %tmp1
}
define <16 x i8> @vld1Qi8(i8* %A) nounwind {
;CHECK: vld1Qi8:
;CHECK: vld1.8
- %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
+ %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 1)
ret <16 x i8> %tmp1
}
@@ -50,7 +50,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind {
;CHECK: vld1Qi16:
;CHECK: vld1.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0)
+ %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 1)
ret <8 x i16> %tmp1
}
@@ -58,7 +58,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind {
;CHECK: vld1Qi32:
;CHECK: vld1.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0)
+ %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
ret <4 x i32> %tmp1
}
@@ -66,7 +66,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind {
;CHECK: vld1Qf:
;CHECK: vld1.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0)
+ %tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
ret <4 x float> %tmp1
}
@@ -74,18 +74,31 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind {
;CHECK: vld1Qi64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0)
+ %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
ret <2 x i64> %tmp1
}
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+
+; Radar 8355607
+; Do not crash if the vld1 result is not used.
+define void @unused_vld1_result() {
+entry:
+;CHECK: unused_vld1_result
+;CHECK: vld1.32
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1)
+ call void @llvm.trap()
+ unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 0838636ce742..811f6e6db96f 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -14,7 +14,7 @@
define <8 x i8> @vld2i8(i8* %A) nounwind {
;CHECK: vld2i8:
;CHECK: vld2.8
- %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -25,7 +25,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
;CHECK: vld2i16:
;CHECK: vld2.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -36,7 +36,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
;CHECK: vld2i32:
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -47,7 +47,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
;CHECK: vld2f:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -58,7 +58,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
;CHECK: vld2i64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -68,7 +68,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
define <16 x i8> @vld2Qi8(i8* %A) nounwind {
;CHECK: vld2Qi8:
;CHECK: vld2.8
- %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -79,7 +79,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
;CHECK: vld2Qi16:
;CHECK: vld2.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -90,7 +90,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
;CHECK: vld2Qi32:
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -101,20 +101,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind {
;CHECK: vld2Qf:
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
-declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 65a24486bc62..92538c34f5b8 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -14,7 +14,7 @@
define <8 x i8> @vld3i8(i8* %A) nounwind {
;CHECK: vld3i8:
;CHECK: vld3.8
- %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -25,7 +25,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
;CHECK: vld3i16:
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -36,7 +36,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
;CHECK: vld3i32:
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -47,7 +47,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
;CHECK: vld3f:
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -58,7 +58,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
;CHECK: vld3i64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -69,7 +69,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind {
;CHECK: vld3Qi8:
;CHECK: vld3.8
;CHECK: vld3.8
- %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -81,7 +81,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind {
;CHECK: vld3.16
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -93,7 +93,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
;CHECK: vld3.32
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -105,20 +105,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind {
;CHECK: vld3.32
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index e0b870638a18..d1bf957ebadc 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -14,7 +14,7 @@
define <8 x i8> @vld4i8(i8* %A) nounwind {
;CHECK: vld4i8:
;CHECK: vld4.8
- %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -25,7 +25,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
;CHECK: vld4i16:
;CHECK: vld4.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -36,7 +36,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
;CHECK: vld4i32:
;CHECK: vld4.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
%tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -47,7 +47,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
;CHECK: vld4f:
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -58,7 +58,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
;CHECK: vld4i64:
;CHECK: vld1.64
%tmp0 = bitcast i64* %A to i8*
- %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
%tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -69,7 +69,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
;CHECK: vld4Qi8:
;CHECK: vld4.8
;CHECK: vld4.8
- %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A)
+ %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 1)
%tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
%tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -81,7 +81,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
;CHECK: vld4.16
;CHECK: vld4.16
%tmp0 = bitcast i16* %A to i8*
- %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -93,7 +93,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind {
;CHECK: vld4.32
;CHECK: vld4.32
%tmp0 = bitcast i32* %A to i8*
- %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -105,20 +105,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind {
;CHECK: vld4.32
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
- %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0)
+ %tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1)
%tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
%tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
%tmp4 = fadd <4 x float> %tmp2, %tmp3
ret <4 x float> %tmp4
}
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
-declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index b32c59019f4c..31ee64fa598f 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -13,7 +13,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vld2lanei8:
;CHECK: vld2.8
%tmp1 = load <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
%tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -25,7 +25,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
%tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -37,7 +37,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -49,7 +49,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
%tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -61,7 +61,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -73,7 +73,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -85,21 +85,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -114,7 +114,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vld3lanei8:
;CHECK: vld3.8
%tmp1 = load <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -128,7 +128,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -142,7 +142,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
@@ -156,7 +156,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
@@ -170,7 +170,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -184,7 +184,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3)
+ %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
@@ -198,7 +198,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
@@ -207,14 +207,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
ret <4 x float> %tmp7
}
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
@@ -229,7 +229,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vld4lanei8:
;CHECK: vld4.8
%tmp1 = load <8 x i8>* %B
- %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -245,7 +245,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vld4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -261,7 +261,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -277,7 +277,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
@@ -293,7 +293,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vld4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -309,7 +309,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -325,7 +325,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vld4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ %tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
%tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
%tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
%tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
@@ -336,11 +336,11 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
ret <4 x float> %tmp9
}
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vmla.ll b/test/CodeGen/ARM/vmla.ll
index 77cf10ad3e68..9c6b210be797 100644
--- a/test/CodeGen/ARM/vmla.ll
+++ b/test/CodeGen/ARM/vmla.ll
@@ -94,8 +94,11 @@ define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = add <8 x i16> %tmp1, %tmp6
+ ret <8 x i16> %tmp7
}
define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -104,8 +107,11 @@ define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = add <4 x i32> %tmp1, %tmp6
+ ret <4 x i32> %tmp7
}
define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -114,8 +120,11 @@ define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = add <2 x i64> %tmp1, %tmp6
+ ret <2 x i64> %tmp7
}
define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
@@ -124,8 +133,11 @@ define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = add <8 x i16> %tmp1, %tmp6
+ ret <8 x i16> %tmp7
}
define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -134,8 +146,11 @@ define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = add <4 x i32> %tmp1, %tmp6
+ ret <4 x i32> %tmp7
}
define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -144,8 +159,11 @@ define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = add <2 x i64> %tmp1, %tmp6
+ ret <2 x i64> %tmp7
}
define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
@@ -153,8 +171,11 @@ entry:
; CHECK: test_vmlal_lanes16
; CHECK: vmlal.s16 q0, d2, d3[1]
%0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+ %2 = sext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ %4 = add <4 x i32> %arg0_int32x4_t, %3
+ ret <4 x i32> %4
}
define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
@@ -162,8 +183,11 @@ entry:
; CHECK: test_vmlal_lanes32
; CHECK: vmlal.s32 q0, d2, d3[1]
%0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+ %2 = sext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ %4 = add <2 x i64> %arg0_int64x2_t, %3
+ ret <2 x i64> %4
}
define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
@@ -171,8 +195,11 @@ entry:
; CHECK: test_vmlal_laneu16
; CHECK: vmlal.u16 q0, d2, d3[1]
%0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+ %2 = zext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ %4 = add <4 x i32> %arg0_uint32x4_t, %3
+ ret <4 x i32> %4
}
define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
@@ -180,14 +207,9 @@ entry:
; CHECK: test_vmlal_laneu32
; CHECK: vmlal.u32 q0, d2, d3[1]
%0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+ %2 = zext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ %4 = add <2 x i64> %arg0_uint64x2_t, %3
+ ret <2 x i64> %4
}
-
-declare <8 x i16> @llvm.arm.neon.vmlals.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlals.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlals.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmlalu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlalu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlalu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmls.ll b/test/CodeGen/ARM/vmls.ll
index 2b70a7878ced..65e7fe41bb3a 100644
--- a/test/CodeGen/ARM/vmls.ll
+++ b/test/CodeGen/ARM/vmls.ll
@@ -94,8 +94,11 @@ define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = sub <8 x i16> %tmp1, %tmp6
+ ret <8 x i16> %tmp7
}
define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -104,8 +107,11 @@ define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = sub <4 x i32> %tmp1, %tmp6
+ ret <4 x i32> %tmp7
}
define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -114,8 +120,11 @@ define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = sub <2 x i64> %tmp1, %tmp6
+ ret <2 x i64> %tmp7
}
define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
@@ -124,8 +133,11 @@ define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
%tmp3 = load <8 x i8>* %C
- %tmp4 = call <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
- ret <8 x i16> %tmp4
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+ %tmp6 = mul <8 x i16> %tmp4, %tmp5
+ %tmp7 = sub <8 x i16> %tmp1, %tmp6
+ ret <8 x i16> %tmp7
}
define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
@@ -134,8 +146,11 @@ define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
%tmp3 = load <4 x i16>* %C
- %tmp4 = call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
- ret <4 x i32> %tmp4
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+ %tmp6 = mul <4 x i32> %tmp4, %tmp5
+ %tmp7 = sub <4 x i32> %tmp1, %tmp6
+ ret <4 x i32> %tmp7
}
define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
@@ -144,8 +159,11 @@ define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
%tmp3 = load <2 x i32>* %C
- %tmp4 = call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
- ret <2 x i64> %tmp4
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+ %tmp6 = mul <2 x i64> %tmp4, %tmp5
+ %tmp7 = sub <2 x i64> %tmp1, %tmp6
+ ret <2 x i64> %tmp7
}
define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
@@ -153,8 +171,11 @@ entry:
; CHECK: test_vmlsl_lanes16
; CHECK: vmlsl.s16 q0, d2, d3[1]
%0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+ %2 = sext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ %4 = sub <4 x i32> %arg0_int32x4_t, %3
+ ret <4 x i32> %4
}
define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
@@ -162,8 +183,11 @@ entry:
; CHECK: test_vmlsl_lanes32
; CHECK: vmlsl.s32 q0, d2, d3[1]
%0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+ %2 = sext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ %4 = sub <2 x i64> %arg0_int64x2_t, %3
+ ret <2 x i64> %4
}
define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
@@ -171,8 +195,11 @@ entry:
; CHECK: test_vmlsl_laneu16
; CHECK: vmlsl.u16 q0, d2, d3[1]
%0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+ %2 = zext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ %4 = sub <4 x i32> %arg0_uint32x4_t, %3
+ ret <4 x i32> %4
}
define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
@@ -180,14 +207,9 @@ entry:
; CHECK: test_vmlsl_laneu32
; CHECK: vmlsl.u32 q0, d2, d3[1]
%0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+ %2 = zext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ %4 = sub <2 x i64> %arg0_uint64x2_t, %3
+ ret <2 x i64> %4
}
-
-declare <8 x i16> @llvm.arm.neon.vmlsls.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlsls.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlsls.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmlslu.v8i16(<8 x i16>, <8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmlslu.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmlslu.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index 5e872ab6d0b2..8cd94576b0c2 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -192,7 +192,7 @@ define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
;CHECK: vmovls8:
;CHECK: vmovl.s8
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8> %tmp1)
+ %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -200,7 +200,7 @@ define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
;CHECK: vmovls16:
;CHECK: vmovl.s16
%tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16> %tmp1)
+ %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -208,7 +208,7 @@ define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
;CHECK: vmovls32:
;CHECK: vmovl.s32
%tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32> %tmp1)
+ %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
@@ -216,7 +216,7 @@ define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
;CHECK: vmovlu8:
;CHECK: vmovl.u8
%tmp1 = load <8 x i8>* %A
- %tmp2 = call <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8> %tmp1)
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
ret <8 x i16> %tmp2
}
@@ -224,7 +224,7 @@ define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
;CHECK: vmovlu16:
;CHECK: vmovl.u16
%tmp1 = load <4 x i16>* %A
- %tmp2 = call <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16> %tmp1)
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
ret <4 x i32> %tmp2
}
@@ -232,23 +232,15 @@ define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
;CHECK: vmovlu32:
;CHECK: vmovl.u32
%tmp1 = load <2 x i32>* %A
- %tmp2 = call <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32> %tmp1)
+ %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
ret <2 x i64> %tmp2
}
-declare <8 x i16> @llvm.arm.neon.vmovls.v8i16(<8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmovls.v4i32(<4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmovls.v2i64(<2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmovlu.v8i16(<8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmovlu.v4i32(<4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmovlu.v2i64(<2 x i32>) nounwind readnone
-
define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
;CHECK: vmovni16:
;CHECK: vmovn.i16
%tmp1 = load <8 x i16>* %A
- %tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
+ %tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
ret <8 x i8> %tmp2
}
@@ -256,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
;CHECK: vmovni32:
;CHECK: vmovn.i32
%tmp1 = load <4 x i32>* %A
- %tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
+ %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
ret <4 x i16> %tmp2
}
@@ -264,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
;CHECK: vmovni64:
;CHECK: vmovn.i64
%tmp1 = load <2 x i64>* %A
- %tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
+ %tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
ret <2 x i32> %tmp2
}
-declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
-declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
-declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
-
define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
;CHECK: vqmovns16:
;CHECK: vqmovn.s16
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1d9168021279..5383425018f8 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -152,8 +152,10 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vmull.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = mul <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -161,8 +163,10 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vmull.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = mul <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -170,8 +174,10 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vmull.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = mul <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -179,8 +185,10 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vmull.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = mul <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -188,8 +196,10 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vmull.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = mul <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -197,8 +207,10 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vmull.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = mul <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -215,8 +227,10 @@ entry:
; CHECK: test_vmull_lanes16
; CHECK: vmull.s16 q0, d0, d1[1]
%0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
+ %2 = sext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
}
define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
@@ -224,8 +238,10 @@ entry:
; CHECK: test_vmull_lanes32
; CHECK: vmull.s32 q0, d0, d1[1]
%0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
+ %2 = sext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ ret <2 x i64> %3
}
define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
@@ -233,8 +249,10 @@ entry:
; CHECK: test_vmull_laneu16
; CHECK: vmull.u16 q0, d0, d1[1]
%0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
- %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %1
+ %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
+ %2 = zext <4 x i16> %0 to <4 x i32>
+ %3 = mul <4 x i32> %1, %2
+ ret <4 x i32> %3
}
define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
@@ -242,16 +260,10 @@ entry:
; CHECK: test_vmull_laneu32
; CHECK: vmull.u32 q0, d0, d1[1]
%0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
- %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %1
+ %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
+ %2 = zext <2 x i32> %0 to <2 x i64>
+ %3 = mul <2 x i64> %1, %2
+ ret <2 x i64> %3
}
-declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index deed554d842c..e1fe64b02d9d 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -111,3 +111,21 @@ define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
ret <16 x i8> %tmp2
}
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+ ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+ ret <8 x i16> %tmp2
+}
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 95414c308914..2b535ada3072 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst1i8:
;CHECK: vst1.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
+ call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 1)
ret void
}
@@ -13,7 +13,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst1.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1)
+ call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1)
+ call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1)
+ call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
ret void
}
@@ -40,7 +40,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>* %B
- call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1)
+ call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -48,7 +48,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst1Qi8:
;CHECK: vst1.8
%tmp1 = load <16 x i8>* %B
- call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
+ call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 1)
ret void
}
@@ -57,7 +57,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst1.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1)
+ call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -66,7 +66,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1)
+ call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -75,7 +75,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst1.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1)
+ call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
ret void
}
@@ -84,18 +84,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <2 x i64>* %B
- call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1)
+ call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
-declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 3c98a2cbe60d..aed15fd51c56 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst2i8:
;CHECK: vst2.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
ret void
}
@@ -13,7 +13,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1)
+ call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -40,7 +40,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>* %B
- call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -48,7 +48,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst2Qi8:
;CHECK: vst2.8
%tmp1 = load <16 x i8>* %B
- call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
ret void
}
@@ -57,7 +57,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -66,7 +66,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -75,17 +75,17 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1)
+ call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 2599bc0db933..1feaed5a1044 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s
define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst3i8:
;CHECK: vst3.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
ret void
}
@@ -13,7 +13,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+ call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -40,7 +40,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>* %B
- call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -49,7 +49,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst3.8
;CHECK: vst3.8
%tmp1 = load <16 x i8>* %B
- call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
ret void
}
@@ -59,7 +59,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -69,7 +69,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -79,17 +79,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+ call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index 878f0efaa480..d302f097fc1f 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst4i8:
;CHECK: vst4.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+ call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
ret void
}
@@ -13,7 +13,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+ call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+ call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+ call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
ret void
}
@@ -40,7 +40,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
;CHECK: vst1.64
%tmp0 = bitcast i64* %A to i8*
%tmp1 = load <1 x i64>* %B
- call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1)
+ call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
ret void
}
@@ -49,7 +49,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst4.8
;CHECK: vst4.8
%tmp1 = load <16 x i8>* %B
- call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1)
+ call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 1)
ret void
}
@@ -59,7 +59,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1)
+ call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
ret void
}
@@ -69,7 +69,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1)
+ call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
ret void
}
@@ -79,17 +79,17 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1)
+ call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index cf50756d465e..30ec52ac6420 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -4,7 +4,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst2lanei8:
;CHECK: vst2.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
ret void
}
@@ -13,7 +13,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -22,7 +22,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -31,7 +31,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -40,7 +40,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst2.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -49,7 +49,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
ret void
}
@@ -58,24 +58,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst2.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3)
+ call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst3lanei8:
;CHECK: vst3.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
ret void
}
@@ -84,7 +84,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -93,7 +93,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -102,7 +102,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -111,7 +111,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst3.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6)
+ call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 1)
ret void
}
@@ -120,7 +120,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0)
+ call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
ret void
}
@@ -129,25 +129,25 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst3.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst4lanei8:
;CHECK: vst4.8
%tmp1 = load <8 x i8>* %B
- call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
ret void
}
@@ -156,7 +156,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <4 x i16>* %B
- call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
ret void
}
@@ -165,7 +165,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <2 x i32>* %B
- call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
ret void
}
@@ -174,7 +174,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <2 x float>* %B
- call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
ret void
}
@@ -183,7 +183,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst4.16
%tmp0 = bitcast i16* %A to i8*
%tmp1 = load <8 x i16>* %B
- call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7)
+ call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 1)
ret void
}
@@ -192,7 +192,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast i32* %A to i8*
%tmp1 = load <4 x i32>* %B
- call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2)
+ call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
ret void
}
@@ -201,15 +201,15 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
;CHECK: vst4.32
%tmp0 = bitcast float* %A to i8*
%tmp1 = load <4 x float>* %B
- call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+ call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/vsub.ll b/test/CodeGen/ARM/vsub.ll
index 3416de76f123..df77bb31fc8b 100644
--- a/test/CodeGen/ARM/vsub.ll
+++ b/test/CodeGen/ARM/vsub.ll
@@ -157,8 +157,10 @@ define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vsubl.s8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = sub <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -166,8 +168,10 @@ define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vsubl.s16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = sub <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -175,8 +179,10 @@ define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vsubl.s32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = sub <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
@@ -184,8 +190,10 @@ define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vsubl.u8
%tmp1 = load <8 x i8>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp5 = sub <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
}
define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
@@ -193,8 +201,10 @@ define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
;CHECK: vsubl.u16
%tmp1 = load <4 x i16>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp5 = sub <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
}
define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
@@ -202,25 +212,20 @@ define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
;CHECK: vsubl.u32
%tmp1 = load <2 x i32>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp5 = sub <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
}
-declare <8 x i16> @llvm.arm.neon.vsubls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vsublu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsublu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsublu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
-
define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK: vsubws8:
;CHECK: vsubw.s8
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+ %tmp4 = sub <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
@@ -228,8 +233,9 @@ define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK: vsubw.s16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+ %tmp4 = sub <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
@@ -237,8 +243,9 @@ define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK: vsubw.s32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+ %tmp4 = sub <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
}
define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
@@ -246,8 +253,9 @@ define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
;CHECK: vsubw.u8
%tmp1 = load <8 x i16>* %A
%tmp2 = load <8 x i8>* %B
- %tmp3 = call <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16> %tmp1, <8 x i8> %tmp2)
- ret <8 x i16> %tmp3
+ %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+ %tmp4 = sub <8 x i16> %tmp1, %tmp3
+ ret <8 x i16> %tmp4
}
define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
@@ -255,8 +263,9 @@ define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
;CHECK: vsubw.u16
%tmp1 = load <4 x i32>* %A
%tmp2 = load <4 x i16>* %B
- %tmp3 = call <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2)
- ret <4 x i32> %tmp3
+ %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+ %tmp4 = sub <4 x i32> %tmp1, %tmp3
+ ret <4 x i32> %tmp4
}
define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
@@ -264,14 +273,7 @@ define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
;CHECK: vsubw.u32
%tmp1 = load <2 x i64>* %A
%tmp2 = load <2 x i32>* %B
- %tmp3 = call <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2)
- ret <2 x i64> %tmp3
+ %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+ %tmp4 = sub <2 x i64> %tmp1, %tmp3
+ ret <2 x i64> %tmp4
}
-
-declare <8 x i16> @llvm.arm.neon.vsubws.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubws.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubws.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
-
-declare <8 x i16> @llvm.arm.neon.vsubwu.v8i16(<8 x i16>, <8 x i8>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vsubwu.v4i32(<4 x i32>, <4 x i16>) nounwind readnone
-declare <2 x i64> @llvm.arm.neon.vsubwu.v2i64(<2 x i64>, <2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 10bb10ac24a1..b1c2f93b47c6 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -95,3 +95,30 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 6cef188d76dd..9130f628919a 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -73,3 +73,30 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+ %tmp1 = load <8 x i16>* %A
+ %tmp2 = load <8 x i16>* %B
+ %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+ %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index a9ecdcab42d7..926970aeb29b 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -73,3 +73,30 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
%tmp5 = fadd <4 x float> %tmp3, %tmp4
ret <4 x float> %tmp5
}
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <8 x i8>* %A
+ %tmp2 = load <8 x i8>* %B
+ %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+ %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+ %tmp1 = load <16 x i8>* %A
+ %tmp2 = load <16 x i8>* %B
+ %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+ %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
new file mode 100644
index 000000000000..b838ec949eae
--- /dev/null
+++ b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=alpha | FileCheck %s
+
+define fastcc i64 @getcount(i64 %s) {
+ %tmp431 = mul i64 %s, 12884901888
+ ret i64 %tmp431
+}
+
+; CHECK: sll $16,33,$0
+; CHECK-NEXT: sll $16,32,$1
+; CHECK-NEXT: addq $0,$1,$0
+
diff --git a/test/CodeGen/CellSPU/arg_ret.ll b/test/CodeGen/CellSPU/arg_ret.ll
new file mode 100644
index 000000000000..743292a58d59
--- /dev/null
+++ b/test/CodeGen/CellSPU/arg_ret.ll
@@ -0,0 +1,33 @@
+; Test parameter passing and return values
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; this fits into registers r3-r74
+%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+ i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+ i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+ i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+ i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+ i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
+define ccc i32 @test_regs( %paramstruct %prm )
+{
+;CHECK: lr $3, $74
+;CHECK: bi $lr
+ %1 = extractvalue %paramstruct %prm, 71
+ ret i32 %1
+}
+
+define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
+{
+;CHECK-NOT: a $3, $74, $75
+ %1 = extractvalue %paramstruct %prm, 71
+ %2 = add i32 %1, %stackprm
+ ret i32 %2
+}
+
+define ccc %paramstruct @test_return( i32 %param, %paramstruct %prm )
+{
+;CHECK: lqd $75, 80($sp)
+;CHECK: lr $3, $4
+ ret %paramstruct %prm
+}
+
diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll
index 5483f463732b..63293e2aecb1 100644
--- a/test/CodeGen/CellSPU/bigstack.ll
+++ b/test/CodeGen/CellSPU/bigstack.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=cellspu -o %t1.s
-; RUN: grep lqx %t1.s | count 4
-; RUN: grep il %t1.s | grep -v file | count 7
-; RUN: grep stqx %t1.s | count 2
+; RUN: grep lqx %t1.s | count 3
+; RUN: grep il %t1.s | grep -v file | count 5
+; RUN: grep stqx %t1.s | count 1
define i32 @bigstack() nounwind {
entry:
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index eb7cf2c6467c..559b266e59df 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
; RUN: grep brsl %t1.s | count 1
-; RUN: grep brasl %t1.s | count 1
-; RUN: grep stqd %t1.s | count 80
+; RUN: grep brasl %t1.s | count 2
+; RUN: grep stqd %t1.s | count 82
; RUN: llc < %s -march=cellspu | FileCheck %s
target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
@@ -29,3 +29,25 @@ define i32 @stub_2(...) {
entry:
ret i32 0
}
+
+; check that struct is passed in r3->
+; assert this by changing the second field in the struct
+%0 = type { i32, i32, i32 }
+declare %0 @callee()
+define %0 @test_structret()
+{
+;CHECK: stqd $lr, 16($sp)
+;CHECK: stqd $sp, -48($sp)
+;CHECK: ai $sp, $sp, -48
+;CHECK: brasl $lr, callee
+ %rv = call %0 @callee()
+;CHECK: ai $4, $4, 1
+;CHECK: lqd $lr, 64($sp)
+;CHECK: ai $sp, $sp, 48
+;CHECK: bi $lr
+ %oldval = extractvalue %0 %rv, 1
+ %newval = add i32 %oldval,1
+ %newrv = insertvalue %0 %rv, i32 %newval, 1
+ ret %0 %newrv
+}
+
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index d94d77c9f142..141361d5702b 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -12,7 +12,7 @@
; RUN: grep rotqby %t2.s | count 5
; RUN: grep lqd %t2.s | count 13
; RUN: grep ilhu %t2.s | count 2
-; RUN: grep ai %t2.s | count 8
+; RUN: grep ai %t2.s | count 9
; RUN: grep dispatch_tab %t2.s | count 6
; ModuleID = 'call_indirect.bc'
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index 04accb9c56b8..f37d2ae89b00 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -16,3 +16,26 @@ define <4 x float> @splat(float %param1) {
ret <4 x float> %val
}
+define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
+ %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
+;CHECK: lqa $6,
+;CHECK: shufb $4, $4, $5, $6
+ %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
+
+;CHECK: cdd $5, 0($3)
+;CHECK: lqd $6, 0($3)
+;CHECK: shufb $4, $4, $6, $5
+;CHECK: stqd $4, 0($3)
+;CHECK: bi $lr
+ store <2 x float> %sl2_17, <2 x float>* %ptr
+ ret void
+}
+
+define <4 x float> @test_insert_1(<4 x float> %vparam, float %eltparam) {
+;CHECK: cwd $5, 4($sp)
+;CHECK: shufb $3, $4, $3, $5
+;CHECK: bi $lr
+ %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
+ ret <4 x float> %rv
+}
+
diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll
new file mode 100644
index 000000000000..b81c0cdbb299
--- /dev/null
+++ b/test/CodeGen/CellSPU/v2f32.ll
@@ -0,0 +1,75 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x float>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK: bi $lr
+ ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: fa {{\$.}}, $3, $3
+ %1 = fadd %vec %param, %param
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: fs {{\$.}}, $3, $3
+ %1 = fsub %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: fm {{\$.}}, $3, $3
+ %1 = fmul %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_splat(float %param ) {
+;CHECK: lqa
+;CHECK: shufb
+ %sv = insertelement <1 x float> undef, float %param, i32 0
+ %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer
+;CHECK: bi $lr
+ ret %vec %rv
+}
+
+define void @test_store(%vec %val, %vec* %ptr){
+
+;CHECK: stqd
+ store %vec undef, %vec* null
+
+;CHECK: stqd $3, 0(${{.}})
+;CHECK: bi $lr
+ store %vec %val, %vec* %ptr
+ ret void
+}
+
+define %vec @test_insert(){
+;CHECK: cwd
+;CHECK: shufb $3
+ %rv = insertelement %vec undef, float 0.0e+00, i32 undef
+;CHECK: bi $lr
+ ret %vec %rv
+}
+
+define void @test_unaligned_store() {
+;CHECK: cdd $3, 8($3)
+;CHECK: lqd
+;CHECK: shufb
+;CHECK: stqd
+ %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1]
+ %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
+ %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
+ store <2 x float> undef, <2 x float>* %vptr
+ ret void
+}
+
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
new file mode 100644
index 000000000000..dd51be5a71d2
--- /dev/null
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -0,0 +1,64 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x i32>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK: bi $lr
+ ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: a {{\$.}}, $3, $3
+ %1 = add %vec %param, %param
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: sf {{\$.}}, $4, $3
+ %1 = sub %vec %param, <i32 1, i32 1>
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: mpyu
+;CHECK: mpyh
+;CHECK: a {{\$., \$., \$.}}
+;CHECK: a {{\$., \$., \$.}}
+ %1 = mul %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define <2 x i32> @test_splat(i32 %param ) {
+;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the
+; somewhat redundant:
+;CHECK-NOT or $3, $3, $3
+;CHECK: lqa
+;CHECK: shufb
+ %sv = insertelement <1 x i32> undef, i32 %param, i32 0
+ %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer
+;CHECK: bi $lr
+ ret <2 x i32> %rv
+}
+
+define i32 @test_extract() {
+;CHECK: shufb $3
+ %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
+;CHECK: bi $lr
+ ret i32 %rv
+}
+
+define void @test_store( %vec %val, %vec* %ptr)
+{
+;CHECK: stqd $3, 0(${{.}})
+;CHECK: bi $lr
+ store %vec %val, %vec* %ptr
+ ret void
+}
diff --git a/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
new file mode 100644
index 000000000000..a2945aaec331
--- /dev/null
+++ b/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s
+
+define float @test1()
+{
+ ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1);
+}
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 8e7b70e2216f..9d8e391f874e 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -4,7 +4,7 @@
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define i64 @add64(i64 %u, i64 %v) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index b2aaa00754b7..b1d20d93f187 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
%struct.sret0 = type { i32, i32, i32 }
define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
diff --git a/test/CodeGen/Mips/2008-07-05-ByVal.ll b/test/CodeGen/Mips/2008-07-05-ByVal.ll
index 6bb6bd862b25..a1f05044b6c6 100644
--- a/test/CodeGen/Mips/2008-07-05-ByVal.ll
+++ b/test/CodeGen/Mips/2008-07-05-ByVal.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
%struct.byval0 = type { i32, i32 }
define i64 @test0(%struct.byval0* byval %b, i64 %sum) nounwind {
diff --git a/test/CodeGen/Mips/2008-07-06-fadd64.ll b/test/CodeGen/Mips/2008-07-06-fadd64.ll
index 808ce16910ee..ecd8521027af 100644
--- a/test/CodeGen/Mips/2008-07-06-fadd64.ll
+++ b/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep __adddf3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define double @dofloat(double %a, double %b) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
index 7ac0f5f840db..681788e98196 100644
--- a/test/CodeGen/Mips/2008-07-07-FPExtend.ll
+++ b/test/CodeGen/Mips/2008-07-07-FPExtend.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep __extendsfdf2
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define double @dofloat(float %a) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index ca996367733e..d804c7dcf317 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define i32 @fptoint(float %a) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
index 20de18a0164c..b8b4c5c610de 100644
--- a/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
+++ b/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
@@ -5,7 +5,7 @@
; RUN: grep __fixunsdfsi %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define double @int2fp(i32 %a) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
index f6b2045444a5..bda4a3172f30 100644
--- a/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
+++ b/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -6,7 +6,7 @@
; RUN: not grep {gp_rel} %t
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
@.str = internal constant [10 x i8] c"AAAAAAAAA\00"
@i0 = internal constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ]
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index 26eb4db26d4d..91efd68622a2 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -10,7 +10,7 @@
; RUN: grep {\%lo} %t1 | count 2
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
%struct.anon = type { i32, i32 }
@s0 = global [8 x i8] c"AAAAAAA\00", align 4
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index 59599b399c29..41ae5dd65f51 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -3,7 +3,7 @@
; RUN: grep seb %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 21ff96005421..20bd88889061 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -2,7 +2,7 @@
; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @F(float %a) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
index 80101fa25b3e..ca837ffd2a50 100644
--- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll
+++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -3,7 +3,7 @@
; RUN: grep {bc1\[tf\]} %t | count 3
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @A(float %a, float %b) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll
index 042cad60e2b0..52a4b081ddb3 100644
--- a/test/CodeGen/Mips/2008-07-29-icmp.ll
+++ b/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @A(float %a, float %b, i32 %j) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
index 77680bccf976..47382f989ca4 100644
--- a/test/CodeGen/Mips/2008-07-31-fcopysign.ll
+++ b/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -3,7 +3,7 @@
; RUN: grep neg.s %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @A(float %i, float %j) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index cd35ccaee83d..23ed64a96d8e 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -4,7 +4,7 @@
; RUN: grep multu %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
%struct.DWstruct = type { i32, i32 }
define i32 @A0(i32 %u, i32 %v) nounwind {
diff --git a/test/CodeGen/Mips/2008-08-03-fabs64.ll b/test/CodeGen/Mips/2008-08-03-fabs64.ll
index 2f33e9bea73f..0fc45f7d1b05 100644
--- a/test/CodeGen/Mips/2008-08-03-fabs64.ll
+++ b/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -3,7 +3,7 @@
; RUN: grep {ori.*65535} %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define double @A(double %c, double %d) nounwind readnone {
entry:
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index ca90b500f050..f8eb02855979 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -3,7 +3,7 @@
; RUN: grep mfc1 %t | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @A(i32 %u) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 79e49a3d682e..7be7974e0ffe 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define i32 @twoalloca(i32 %size) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-08-07-CC.ll b/test/CodeGen/Mips/2008-08-07-CC.ll
index 54d454cc3ade..63c25951423a 100644
--- a/test/CodeGen/Mips/2008-08-07-CC.ll
+++ b/test/CodeGen/Mips/2008-08-07-CC.ll
@@ -3,7 +3,7 @@
target datalayout =
"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define internal fastcc i32 @A(i32 %u) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-08-07-FPRound.ll b/test/CodeGen/Mips/2008-08-07-FPRound.ll
index f3bb965cdb69..67f86d741141 100644
--- a/test/CodeGen/Mips/2008-08-07-FPRound.ll
+++ b/test/CodeGen/Mips/2008-08-07-FPRound.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define float @round2float(double %a) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index 1da1db24bf5a..fb3332329d6c 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=mips | grep clz | count 1
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-psp-elf"
+target triple = "mipsallegrexel-unknown-psp-elf"
define i32 @A0(i32 %u) nounwind {
entry:
diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll
new file mode 100644
index 000000000000..8b7f9a919378
--- /dev/null
+++ b/test/CodeGen/Mips/2010-07-20-Select.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; Fix PR7473
+
+define i32 @main() nounwind readnone {
+entry:
+ %a = alloca i32, align 4 ; <i32*> [#uses=2]
+ %c = alloca i32, align 4 ; <i32*> [#uses=2]
+ volatile store i32 1, i32* %a, align 4
+ volatile store i32 0, i32* %c, align 4
+ %0 = volatile load i32* %a, align 4 ; <i32> [#uses=1]
+ %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
+; CHECK: addiu $4, $zero, 3
+ %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; <i32> [#uses=1]
+ %2 = volatile load i32* %c, align 4 ; <i32> [#uses=1]
+ %3 = icmp eq i32 %2, 0 ; <i1> [#uses=1]
+; CHECK: addu $4, $zero, $3
+; CHECK: addu $2, $5, $4
+ %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; <i32> [#uses=1]
+ %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; <i32> [#uses=1]
+ ret i32 %4
+}
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
new file mode 100644
index 000000000000..07fc10cae180
--- /dev/null
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+
+define i32 @main() nounwind readnone {
+entry:
+ %x = alloca i32, align 4 ; <i32*> [#uses=2]
+ volatile store i32 2, i32* %x, align 4
+ %0 = volatile load i32* %x, align 4 ; <i32> [#uses=1]
+; CHECK: lui $3, %hi($JTI0_0)
+; CHECK: sll $2, $2, 2
+; CHECK: addiu $3, $3, %lo($JTI0_0)
+ switch i32 %0, label %bb4 [
+ i32 0, label %bb5
+ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ ]
+
+bb1: ; preds = %entry
+ ret i32 2
+
+; CHECK: $BB0_2
+bb2: ; preds = %entry
+ ret i32 0
+
+bb3: ; preds = %entry
+ ret i32 3
+
+bb4: ; preds = %entry
+ ret i32 4
+
+bb5: ; preds = %entry
+ ret i32 1
+}
diff --git a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll b/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
deleted file mode 100644
index db2ab877ff7d..000000000000
--- a/test/CodeGen/PowerPC/2008-01-25-EmptyFunction.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=ppc32 | grep nop
-target triple = "powerpc-apple-darwin8"
-
-
-define void @bork() noreturn nounwind {
-entry:
- unreachable
-}
diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll
new file mode 100644
index 000000000000..3a2907d5d7b9
--- /dev/null
+++ b/test/CodeGen/PowerPC/empty-functions.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+ unreachable
+}
+; CHECK-NO-FP: _func:
+; CHECK-NO-FP: nop
+
+; CHECK-FP: _func:
+; CHECK-FP: nop
diff --git a/test/CodeGen/PowerPC/vec_constants.ll b/test/CodeGen/PowerPC/vec_constants.ll
index 32c6f4809cb4..399f19f8d2e2 100644
--- a/test/CodeGen/PowerPC/vec_constants.ll
+++ b/test/CodeGen/PowerPC/vec_constants.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
-define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
+define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
%tmp = load <4 x i32>* %P1 ; <<4 x i32>> [#uses=1]
%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp4, <4 x i32>* %P1
@@ -15,26 +15,30 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
ret void
}
-define <4 x i32> @test_30() {
+define <4 x i32> @test_30() nounwind {
ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
}
-define <4 x i32> @test_29() {
+define <4 x i32> @test_29() nounwind {
ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
}
-define <8 x i16> @test_n30() {
+define <8 x i16> @test_n30() nounwind {
ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
}
-define <16 x i8> @test_n104() {
+define <16 x i8> @test_n104() nounwind {
ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
}
-define <4 x i32> @test_vsldoi() {
+define <4 x i32> @test_vsldoi() nounwind {
ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
}
-define <4 x i32> @test_rol() {
+define <8 x i16> @test_vsldoi_65023() nounwind {
+ ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+}
+
+define <4 x i32> @test_rol() nounwind {
ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
index cf12063e5d4c..eabeb0a42254 100644
--- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s | grep {st %} | count 2
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
index 1e6232a62550..53bb641cf1eb 100644
--- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
+++ b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
@@ -4,7 +4,7 @@
; RUN: llc < %s | grep {sth.%} | count 2
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
index e0bc302c7314..ac6067abbee0 100644
--- a/test/CodeGen/SystemZ/07-BrUnCond.ll
+++ b/test/CodeGen/SystemZ/07-BrUnCond.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define void @foo() noreturn nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
index 27189ab41567..30810ce6eb90 100644
--- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll
+++ b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define void @foo(i64 %N) nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
index 6e0c1ab2c165..50a26e2a451a 100644
--- a/test/CodeGen/SystemZ/09-Globals.ll
+++ b/test/CodeGen/SystemZ/09-Globals.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s | grep larl | count 3
target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
@bar = common global i64 0, align 8 ; <i64*> [#uses=3]
define i64 @foo() nounwind readonly {
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
index cc325389d787..f291e5ff42b6 100644
--- a/test/CodeGen/SystemZ/10-FuncsPic.ll
+++ b/test/CodeGen/SystemZ/10-FuncsPic.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
@ptr = external global void (...)* ; <void (...)**> [#uses=2]
define void @foo1() nounwind {
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
index a77671e2ba7b..c581ad9c4578 100644
--- a/test/CodeGen/SystemZ/10-GlobalsPic.ll
+++ b/test/CodeGen/SystemZ/10-GlobalsPic.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
@src = external global i32 ; <i32*> [#uses=2]
@dst = external global i32 ; <i32*> [#uses=2]
@ptr = external global i32* ; <i32**> [#uses=2]
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
index 609d9dcf59c5..b170a8044a9d 100644
--- a/test/CodeGen/SystemZ/11-BSwap.ll
+++ b/test/CodeGen/SystemZ/11-BSwap.ll
@@ -2,7 +2,7 @@
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define i16 @foo(i16 zeroext %a) zeroext {
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
index 07a164d42645..54424e18f68b 100644
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=systemz | grep rll
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
entry:
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
index 99d0ee7b03d9..89b22251eb23 100644
--- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
+++ b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
index a35167fba04f..68ccb848980c 100644
--- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
+++ b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
entry:
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
index b37f7e92d5fb..98feb83231dc 100644
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
index 5457b12afcba..f4e176eb4421 100644
--- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
+++ b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define float @foo(i32 signext %a) {
entry:
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
index a91e29ea4f9d..63fd8553b32e 100644
--- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
+++ b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s
target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-linux"
+target triple = "s390x-ibm-linux"
define signext i32 @dfg_parse() nounwind {
entry:
diff --git a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 2074bfd5d7b9..929c472d1ef6 100644
--- a/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s | not grep r11
-target triple = "thumb-linux-gnueabi"
+target triple = "thumb-unknown-linux-gnueabi"
%struct.__sched_param = type { i32 }
%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 }
@i.1882 = internal global i32 1 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
new file mode 100644
index 000000000000..9a6321bb43c4
--- /dev/null
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s
+; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s
+; Stripping out debug info formerly caused the last two multiplies to be emitted in
+; the other order. 7797940 (part of it dated 6/29/2010..7/15/2010).
+
+%0 = type { [3 x double] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
+; CHECK: blx ___muldf3
+; CHECK: blx ___muldf3
+; CHECK: beq LBB0_8
+; CHECK: blx ___muldf3
+; <label>:3
+ switch i32 %1, label %4 [
+ i32 0, label %5
+ i32 3, label %5
+ ]
+
+; <label>:4 ; preds = %3
+ br label %5, !dbg !0
+
+; <label>:5 ; preds = %4, %3, %3
+ %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1]
+ %v_6 = icmp slt i32 %1, 2 ; <i1> [#uses=1]
+ %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+ call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0
+ %v_7 = icmp eq i32 %2, 1, !dbg !92 ; <i1> [#uses=1]
+ %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+ %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+ %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+ %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1]
+ %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1]
+ %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93 ; <double> [#uses=3]
+ %v_17 = fmul double %storemerge, %v_16, !dbg !97 ; <double> [#uses=1]
+ store double %v_17, double* %v_8, align 4, !dbg !97
+ %v_19 = fmul double %storemerge2, %v_16, !dbg !97 ; <double> [#uses=1]
+ store double %v_19, double* %v_10, align 4, !dbg !97
+ ret void, !dbg !98
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare double @sqrt(double) nounwind readonly
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 46, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !22, metadata !22}
+!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
+!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
+!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 524321, i64 0, i64 2} ; [ DW_TAG_subrange_type ]
+!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{null, metadata !19, metadata !20}
+!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ]
+!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{null, metadata !19}
+!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
+!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!32 = metadata !{metadata !13, metadata !33}
+!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!39 = metadata !{metadata !40, metadata !19}
+!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
+!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!45 = metadata !{null, metadata !19, metadata !13}
+!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!50 = metadata !{null, metadata !19, metadata !51}
+!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
+!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!56 = metadata !{metadata !51, metadata !33}
+!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!59 = metadata !{metadata !8, metadata !33}
+!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!62 = metadata !{metadata !13, metadata !33, metadata !22}
+!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!65 = metadata !{metadata !40, metadata !19, metadata !22}
+!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!68 = metadata !{metadata !69, metadata !19, metadata !51}
+!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
+!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!73 = metadata !{metadata !69, metadata !19, metadata !13}
+!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
+!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!87 = metadata !{metadata !22, metadata !33}
+!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
+!92 = metadata !{i32 48, i32 0, metadata !1, null}
+!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
+!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!96 = metadata !{i32 51, i32 0, metadata !1, null}
+!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
+!98 = metadata !{i32 52, i32 0, metadata !1, null}
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
new file mode 100644
index 000000000000..c611b865f67d
--- /dev/null
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -march=thumb -mattr=+v6m | FileCheck %s -check-prefix=V6M
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+
+define void @t1() {
+; V6: t1:
+; V6: blx {{_*}}sync_synchronize
+
+; V6M: t1:
+; V6M: dsb
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+ ret void
+}
+
+define void @t2() {
+; V6: t2:
+; V6: blx {{_*}}sync_synchronize
+
+; V6M: t2:
+; V6M: dmb
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+ ret void
+}
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index acfdc917ddf0..5c8ad974bc0e 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=thumb | not grep {ldr sp}
; RUN: llc < %s -mtriple=thumb-apple-darwin | \
; RUN: not grep {sub.*r7}
-; RUN: llc < %s -march=thumb | grep 4294967280
+; RUN: llc < %s -march=thumb | grep {mov.*r6, sp}
%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 02de36af1cc7..b289484f5efb 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,20 +1,35 @@
-; RUN: llc < %s -march=thumb | grep {ldr.*LCP} | count 5
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #256
+; CHECK: add sp, #256
%tmp = alloca [ 64 x i32 ] , align 4
ret void
}
define void @test2() {
+; CHECK: test2:
+; CHECK: ldr r0, LCPI
+; CHECK: add sp, r0
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
%tmp = alloca [ 4168 x i8 ] , align 4
ret void
}
define i32 @test3() {
- %retval = alloca i32, align 4
- %tmp = alloca i32, align 4
- %a = alloca [805306369 x i8], align 16
- store i32 0, i32* %tmp
- %tmp1 = load i32* %tmp
- ret i32 %tmp1
+; CHECK: test3:
+; CHECK: ldr r2, LCPI
+; CHECK: add sp, r2
+; CHECK: ldr r1, LCPI
+; CHECK: add r1, sp
+; CHECK: mov sp, r7
+; CHECK: sub sp, #4
+ %retval = alloca i32, align 4
+ %tmp = alloca i32, align 4
+ %a = alloca [805306369 x i8], align 16
+ store i32 0, i32* %tmp
+ %tmp1 = load i32* %tmp
+ ret i32 %tmp1
}
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 16a9c4442d8a..c2ba208e4ae2 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=thumb
-; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 1
+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2
; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
@str = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 98a5263c2f99..45d356c3dc67 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -11,8 +11,8 @@
define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
; CHECK: _ZNKSs7compareERKSs:
; CHECK: it eq
-; CHECK-NEXT: subeq.w r0, r6, r8
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r8, r9, pc}
+; CHECK-NEXT: subeq r0, r6, r7
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
entry:
%0 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
%1 = tail call arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
index 3f1b9eb8d9d0..2246de35e03c 100644
--- a/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
+++ b/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -7,17 +7,12 @@
define void @t() nounwind ssp {
entry:
; CHECK: t:
-; CHECK: mov r0, sp
-; CHECK: bfc r0, #0, #3
-; CHECK: subs r0, #16
-; CHECK: mov sp, r0
-; Yes, this is stupid codegen, but it's correct.
-; CHECK: mov r0, sp
-; CHECK: bfc r0, #0, #3
-; CHECK: subs r0, #16
-; CHECK: mov sp, r0
%size = mul i32 8, 2
+; CHECK: subs r0, #16
+; CHECK: mov sp, r0
%vla_a = alloca i8, i32 %size, align 8
+; CHECK: subs r0, #16
+; CHECK: mov sp, r0
%vla_b = alloca i8, i32 %size, align 8
unreachable
}
diff --git a/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
new file mode 100644
index 000000000000..abcf13a3e38f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+
+@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
+
+define internal fastcc i32 @Callee(i32 %i) nounwind {
+entry:
+; CHECK: Callee:
+ %0 = icmp eq i32 %i, 0 ; <i1> [#uses=1]
+ br i1 %0, label %bb2, label %bb
+
+bb: ; preds = %entry
+ %1 = alloca [1000 x i8], align 4 ; <[1000 x i8]*> [#uses=1]
+ %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+ %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+ %3 = load i8* %.sub, align 4 ; <i8> [#uses=1]
+ %4 = sext i8 %3 to i32 ; <i32> [#uses=1]
+ ret i32 %4
+
+bb2: ; preds = %entry
+; Must restore sp from fp here
+; CHECK: mov sp, r7
+; CHECK: sub sp, #8
+; CHECK: pop
+ ret i32 0
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
+
+define i32 @main() nounwind {
+; CHECK: main:
+bb.nph:
+ br label %bb
+
+bb: ; preds = %bb, %bb.nph
+ %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ] ; <i32> [#uses=2]
+ %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ] ; <i32> [#uses=1]
+ %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
+ %2 = add nsw i32 %1, %j.01 ; <i32> [#uses=2]
+ %3 = add nsw i32 %0, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %3, 10000 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb2, label %bb
+
+bb2: ; preds = %bb
+; No need to restore sp from fp here.
+; CHECK: printf
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub sp, #12
+; CHECK: pop
+ %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
new file mode 100644
index 000000000000..22473bb35a0a
--- /dev/null
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: movs r2, #10
+; CHECK: bfi r1, r2, #22, #4
+ %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %1 = and i32 %0, -62914561 ; <i32> [#uses=1]
+ %2 = or i32 %1, 41943040 ; <i32> [#uses=1]
+ store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+ ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f2
+; CHECK: lsrs r1, r1, #7
+; CHECK: bfi r0, r1, #7, #16
+ %and = and i32 %A, -8388481 ; <i32> [#uses=1]
+ %and2 = and i32 %B, 8388480 ; <i32> [#uses=1]
+ %or = or i32 %and2, %and ; <i32> [#uses=1]
+ ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f3
+; CHECK: lsrs r2, r0, #7
+; CHECK: mov r0, r1
+; CHECK: bfi r0, r2, #7, #16
+ %and = and i32 %A, 8388480 ; <i32> [#uses=1]
+ %and2 = and i32 %B, -8388481 ; <i32> [#uses=1]
+ %or = or i32 %and2, %and ; <i32> [#uses=1]
+ ret i32 %or
+}
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
new file mode 100644
index 000000000000..f7ec5a3b577c
--- /dev/null
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+
+
+define float @foo(float %a, float %b) {
+entry:
+; CHECK: foo
+; CORTEXM3: blx ___mulsf3
+; CORTEXM4: vmul.f32 s0, s1, s0
+; CORTEXA8: vmul.f32 d0, d1, d0
+ %0 = fmul float %a, %b
+ ret float %0
+}
+
+define double @bar(double %a, double %b) {
+entry:
+; CHECK: bar
+ %0 = fmul double %a, %b
+; CORTEXM3: blx ___muldf3
+; CORTEXM4: blx ___muldf3
+; CORTEXA8: vmul.f64 d0, d1, d0
+ ret double %0
+}
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index 87af9d10572b..d8b51ec82ded 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -14,11 +14,11 @@ entry:
%6 = bitcast i32* %sp3 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%7 = load <4 x i32>* %6, align 16 ; <<4 x i32>> [#uses=1]
%8 = bitcast i32* %dp to i8* ; <i8*> [#uses=1]
- tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7)
+ tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
ret void
}
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
@sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
@dbuf = common global [16 x i32] zeroinitializer ; <[16 x i32]*> [#uses=2]
@@ -44,6 +44,6 @@ bb2: ; preds = %bb
%3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
%4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
%5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
- tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5) nounwind
+ tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
ret i32 0
}
diff --git a/test/CodeGen/Thumb2/div.ll b/test/CodeGen/Thumb2/div.ll
index 0cddd489fb46..e63a115273ff 100644
--- a/test/CodeGen/Thumb2/div.ll
+++ b/test/CodeGen/Thumb2/div.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=thumb -mattr=+thumb2 \
; RUN: | FileCheck %s -check-prefix=CHECK-THUMB
-; RUN: llc < %s -march=arm -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
; RUN: | FileCheck %s -check-prefix=CHECK-THUMBV7M
define i32 @f1(i32 %a, i32 %b) {
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 29b8e75cb8b3..650d788cb4d2 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
entry:
-; CHECK: ldr.w r9, [r7, #28]
+; CHECK: ldr.w {{(r[0-9])|(lr)}}, [r7, #28]
%xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0]
%ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0]
br label %bb20
@@ -46,9 +46,9 @@ bb119: ; preds = %bb20, %bb20
bb420: ; preds = %bb20, %bb20
; CHECK: bb420
-; CHECK: str r{{[0-7]}}, [sp]
-; CHECK: str r{{[0-7]}}, [sp, #4]
-; CHECK: str r{{[0-7]}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
store %union.rec* null, %union.rec** @zz_hold, align 4
store %union.rec* null, %union.rec** @zz_res, align 4
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 7fa782f91de9..ad957a1fcb45 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -21,8 +21,8 @@ entry:
bb: ; preds = %bb, %entry
; CHECK: LBB0_1:
; CHECK: cmp r2, #0
-; CHECK: sub.w r9, r2, #1
-; CHECK: mov r2, r9
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1
+; CHECK: mov r2, [[REGISTER]]
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm-vdup.ll b/test/CodeGen/Thumb2/machine-licm-vdup.ll
new file mode 100644
index 000000000000..fde2ee0ab0c9
--- /dev/null
+++ b/test/CodeGen/Thumb2/machine-licm-vdup.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim -arm-vdup-splat | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -arm-vdup-splat | FileCheck %s
+; Modified version of machine-licm.ll with -arm-vdup-splat turned on, 8003375.
+; Eventually this should become the default and be moved into machine-licm.ll.
+; FIXME: the vdup should be hoisted out of the loop, 8248029.
+
+define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: mov.w r3, #1065353216
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK-NEXT: %bb1
+; CHECK: vdup.32 q1, r3
+ %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+ %tmp1 = shl i32 %indvar, 2
+ %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+ %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+ %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
+ %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+ call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+ %indvar.next = add i32 %indvar, 1
+ %cond = icmp eq i32 %indvar.next, 10
+ br i1 %cond, label %bb2, label %bb1
+
+bb2:
+ ret void
+}
+
+; CHECK-NOT: LCPI1_0:
+; CHECK: .subsections_via_symbols
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index cdb3041b3bea..b949b2f30506 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -64,10 +64,10 @@ bb1:
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
%tmp1 = shl i32 %indvar, 2
%gep1 = getelementptr i8* %ptr1, i32 %tmp1
- %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1)
+ %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
%gep2 = getelementptr i8* %ptr2, i32 %tmp1
- call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3)
+ call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
%indvar.next = add i32 %indvar, 1
%cond = icmp eq i32 %indvar.next, 10
br i1 %cond, label %bb2, label %bb1
@@ -79,8 +79,8 @@ bb2:
; CHECK: LCPI1_0:
; CHECK: .section
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
index 76c56d00473d..7b0432de9bb5 100644
--- a/test/CodeGen/Thumb2/thumb2-and2.ll
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -30,7 +30,7 @@ define i32 @f4(i32 %a) {
ret i32 %tmp
}
; CHECK: f4:
-; CHECK: and r0, r0, #1448498774
+; CHECK: bic r0, r0, #-1448498775
; 66846720 = 0x03fc0000
define i32 @f5(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-badreg-operands.ll b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
new file mode 100644
index 000000000000..4df06b836fc5
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-badreg-operands.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+
+define void @b(i32 %x) nounwind optsize {
+entry:
+; CHECK: b
+; CHECK: mov r2, sp
+; CHECK: mls r0, r0, r1, r2
+; CHECK: mov sp, r0
+ %0 = mul i32 %x, 24 ; <i32> [#uses=1]
+ %vla = alloca i8, i32 %0, align 1 ; <i8*> [#uses=1]
+ call arm_aapcscc void @a(i8* %vla) nounwind optsize
+ ret void
+}
+
+declare void @a(i8*) optsize
diff --git a/test/CodeGen/Thumb2/thumb2-barrier.ll b/test/CodeGen/Thumb2/thumb2-barrier.ll
new file mode 100644
index 000000000000..a54d09e62919
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1 )
+
+define void @t1() {
+; CHECK: t1:
+; CHECK: dsb
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true )
+ ret void
+}
+
+define void @t2() {
+; CHECK: t2:
+; CHECK: dmb
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 false )
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-call-tc.ll b/test/CodeGen/Thumb2/thumb2-call-tc.ll
index 24502b0338c2..2e4da1b289b5 100644
--- a/test/CodeGen/Thumb2/thumb2-call-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+; XFAIL: *
@t = weak global i32 ()* null ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
index d4773bb5809b..63249f4cf145 100644
--- a/test/CodeGen/Thumb2/thumb2-cmp.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -39,3 +39,17 @@ define i1 @f5(i32 %a) {
%tmp = icmp eq i32 %a, 1114112
ret i1 %tmp
}
+
+; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
+;
+; CHECK: f6:
+; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK: bx lr
+define i32 @f6(i32 %a) {
+ %tmp = icmp sgt i32 %a, 2147483647
+ br i1 %tmp, label %true, label %false
+true:
+ ret i32 2
+false:
+ ret i32 0
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
index c02441547718..5315535db045 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; XFAIL: *
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK: t1:
diff --git a/test/CodeGen/Thumb2/thumb2-pack.ll b/test/CodeGen/Thumb2/thumb2-pack.ll
index c8302df78f68..2e8bb1d60934 100644
--- a/test/CodeGen/Thumb2/thumb2-pack.ll
+++ b/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
-; RUN: grep pkhbt | count 5
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | \
-; RUN: grep pkhtb | count 4
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; CHECK: test1
+; CHECK: pkhbt r0, r0, r1, lsl #16
define i32 @test1(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp4 = shl i32 %Y, 16 ; <i32> [#uses=1]
@@ -10,6 +9,8 @@ define i32 @test1(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test1a
+; CHECK: pkhbt r0, r0, r1, lsl #16
define i32 @test1a(i32 %X, i32 %Y) {
%tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp37 = shl i32 %Y, 16 ; <i32> [#uses=1]
@@ -17,6 +18,8 @@ define i32 @test1a(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test2
+; CHECK: pkhbt r0, r0, r1, lsl #12
define i32 @test2(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp3 = shl i32 %Y, 12 ; <i32> [#uses=1]
@@ -25,6 +28,8 @@ define i32 @test2(i32 %X, i32 %Y) {
ret i32 %tmp57
}
+; CHECK: test3
+; CHECK: pkhbt r0, r0, r1, lsl #18
define i32 @test3(i32 %X, i32 %Y) {
%tmp19 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp37 = shl i32 %Y, 18 ; <i32> [#uses=1]
@@ -32,6 +37,8 @@ define i32 @test3(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test4
+; CHECK: pkhbt r0, r0, r1
define i32 @test4(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, 65535 ; <i32> [#uses=1]
%tmp3 = and i32 %Y, -65536 ; <i32> [#uses=1]
@@ -39,6 +46,8 @@ define i32 @test4(i32 %X, i32 %Y) {
ret i32 %tmp46
}
+; CHECK: test5
+; CHECK: pkhtb r0, r0, r1, asr #16
define i32 @test5(i32 %X, i32 %Y) {
%tmp17 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp2 = bitcast i32 %Y to i32 ; <i32> [#uses=1]
@@ -47,6 +56,8 @@ define i32 @test5(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test5a
+; CHECK: pkhtb r0, r0, r1, asr #16
define i32 @test5a(i32 %X, i32 %Y) {
%tmp110 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp37 = lshr i32 %Y, 16 ; <i32> [#uses=1]
@@ -55,6 +66,8 @@ define i32 @test5a(i32 %X, i32 %Y) {
ret i32 %tmp5
}
+; CHECK: test6
+; CHECK: pkhtb r0, r0, r1, asr #12
define i32 @test6(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp37 = lshr i32 %Y, 12 ; <i32> [#uses=1]
@@ -64,6 +77,8 @@ define i32 @test6(i32 %X, i32 %Y) {
ret i32 %tmp59
}
+; CHECK: test7
+; CHECK: pkhtb r0, r0, r1, asr #18
define i32 @test7(i32 %X, i32 %Y) {
%tmp1 = and i32 %X, -65536 ; <i32> [#uses=1]
%tmp3 = ashr i32 %Y, 18 ; <i32> [#uses=1]
@@ -71,3 +86,12 @@ define i32 @test7(i32 %X, i32 %Y) {
%tmp57 = or i32 %tmp4, %tmp1 ; <i32> [#uses=1]
ret i32 %tmp57
}
+
+; CHECK: test8
+; CHECK: pkhtb r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+ %tmp1 = and i32 %X, -65536
+ %tmp3 = lshr i32 %Y, 22
+ %tmp57 = or i32 %tmp3, %tmp1
+ ret i32 %tmp57
+}
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 3946371709d5..4f92c9333806 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -7,7 +7,7 @@
%quux = type { i32 (...)**, %baz*, i32 }
%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
define void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa:
@@ -15,11 +15,11 @@ define void @aaa(%quuz* %this, i8* %block) {
; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
entry:
- %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4
- %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
store float 0.000000e+00, float* undef, align 4
- %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
+ %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
%val173 = load <4 x float>* undef ; <<4 x float>> [#uses=1]
br label %bb4
diff --git a/test/CodeGen/Thumb2/thumb2-uxtb.ll b/test/CodeGen/Thumb2/thumb2-uxtb.ll
index 1fa4e5c21dab..2074f98cb608 100644
--- a/test/CodeGen/Thumb2/thumb2-uxtb.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -6,7 +6,7 @@ define i32 @test1(i32 %x) {
; ARMv7A: uxtb16 r0, r0
; ARMv7M: test1
-; ARMv7M: and r0, r0, #16711935
+; ARMv7M: bic r0, r0, #-16711936
%tmp1 = and i32 %x, 16711935 ; <i32> [#uses=1]
ret i32 %tmp1
}
diff --git a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
index 2d7bd27d24bd..35b0159d39c6 100644
--- a/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
+++ b/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 | grep setnp
-; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
; RUN: not grep setnp
define i32 @test(float %f) {
diff --git a/test/CodeGen/X86/2007-06-14-branchfold.ll b/test/CodeGen/X86/2007-06-14-branchfold.ll
deleted file mode 100644
index 2680b1543fbb..000000000000
--- a/test/CodeGen/X86/2007-06-14-branchfold.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=i686 | not grep jmp
-; check that branch folding understands FP_REG_KILL is not a branch
-
-target triple = "i686-pc-linux-gnu"
- %struct.FRAME.c34003a = type { float, float }
-@report_E = global i8 0 ; <i8*> [#uses=0]
-
-define void @main() {
-entry:
- %FRAME.31 = alloca %struct.FRAME.c34003a, align 8 ; <%struct.FRAME.c34003a*> [#uses=4]
- %tmp20 = call i32 @report__ident_int( i32 -50 ) ; <i32> [#uses=1]
- %tmp2021 = sitofp i32 %tmp20 to float ; <float> [#uses=5]
- %tmp23 = fcmp ult float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1]
- %tmp26 = fcmp ugt float %tmp2021, 0x47EFFFFFE0000000 ; <i1> [#uses=1]
- %bothcond = or i1 %tmp23, %tmp26 ; <i1> [#uses=1]
- br i1 %bothcond, label %bb, label %bb30
-
-bb: ; preds = %entry
- unwind
-
-bb30: ; preds = %entry
- %tmp35 = call i32 @report__ident_int( i32 50 ) ; <i32> [#uses=1]
- %tmp3536 = sitofp i32 %tmp35 to float ; <float> [#uses=4]
- %tmp38 = fcmp ult float %tmp3536, 0xC7EFFFFFE0000000 ; <i1> [#uses=1]
- %tmp44 = fcmp ugt float %tmp3536, 0x47EFFFFFE0000000 ; <i1> [#uses=1]
- %bothcond226 = or i1 %tmp38, %tmp44 ; <i1> [#uses=1]
- br i1 %bothcond226, label %bb47, label %bb49
-
-bb47: ; preds = %bb30
- unwind
-
-bb49: ; preds = %bb30
- %tmp60 = fcmp ult float %tmp3536, %tmp2021 ; <i1> [#uses=1]
- %tmp60.not = xor i1 %tmp60, true ; <i1> [#uses=1]
- %tmp65 = fcmp olt float %tmp2021, 0xC7EFFFFFE0000000 ; <i1> [#uses=1]
- %bothcond227 = and i1 %tmp65, %tmp60.not ; <i1> [#uses=1]
- br i1 %bothcond227, label %cond_true68, label %cond_next70
-
-cond_true68: ; preds = %bb49
- unwind
-
-cond_next70: ; preds = %bb49
- %tmp71 = call i32 @report__ident_int( i32 -30 ) ; <i32> [#uses=1]
- %tmp7172 = sitofp i32 %tmp71 to float ; <float> [#uses=3]
- %tmp74 = fcmp ult float %tmp7172, 0xC7EFFFFFE0000000 ; <i1> [#uses=1]
- %tmp80 = fcmp ugt float %tmp7172, 0x47EFFFFFE0000000 ; <i1> [#uses=1]
- %bothcond228 = or i1 %tmp74, %tmp80 ; <i1> [#uses=1]
- br i1 %bothcond228, label %bb83, label %bb85
-
-bb83: ; preds = %cond_next70
- unwind
-
-bb85: ; preds = %cond_next70
- %tmp90 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 1 ; <float*> [#uses=3]
- store float %tmp7172, float* %tmp90
- %tmp92 = call i32 @report__ident_int( i32 30 ) ; <i32> [#uses=1]
- %tmp9293 = sitofp i32 %tmp92 to float ; <float> [#uses=7]
- %tmp95 = fcmp ult float %tmp9293, 0xC7EFFFFFE0000000 ; <i1> [#uses=1]
- %tmp101 = fcmp ugt float %tmp9293, 0x47EFFFFFE0000000 ; <i1> [#uses=1]
- %bothcond229 = or i1 %tmp95, %tmp101 ; <i1> [#uses=1]
- br i1 %bothcond229, label %bb104, label %bb106
-
-bb104: ; preds = %bb85
- unwind
-
-bb106: ; preds = %bb85
- %tmp111 = getelementptr %struct.FRAME.c34003a* %FRAME.31, i32 0, i32 0 ; <float*> [#uses=2]
- store float %tmp9293, float* %tmp111
- %tmp123 = load float* %tmp90 ; <float> [#uses=4]
- %tmp125 = fcmp ult float %tmp9293, %tmp123 ; <i1> [#uses=1]
- br i1 %tmp125, label %cond_next147, label %cond_true128
-
-cond_true128: ; preds = %bb106
- %tmp133 = fcmp olt float %tmp123, %tmp2021 ; <i1> [#uses=1]
- %tmp142 = fcmp ogt float %tmp9293, %tmp3536 ; <i1> [#uses=1]
- %bothcond230 = or i1 %tmp133, %tmp142 ; <i1> [#uses=1]
- br i1 %bothcond230, label %bb145, label %cond_next147
-
-bb145: ; preds = %cond_true128
- unwind
-
-cond_next147: ; preds = %cond_true128, %bb106
- %tmp157 = fcmp ugt float %tmp123, -3.000000e+01 ; <i1> [#uses=1]
- %tmp165 = fcmp ult float %tmp9293, -3.000000e+01 ; <i1> [#uses=1]
- %bothcond231 = or i1 %tmp157, %tmp165 ; <i1> [#uses=1]
- br i1 %bothcond231, label %bb168, label %bb169
-
-bb168: ; preds = %cond_next147
- unwind
-
-bb169: ; preds = %cond_next147
- %tmp176 = fcmp ugt float %tmp123, 3.000000e+01 ; <i1> [#uses=1]
- %tmp184 = fcmp ult float %tmp9293, 3.000000e+01 ; <i1> [#uses=1]
- %bothcond232 = or i1 %tmp176, %tmp184 ; <i1> [#uses=1]
- br i1 %bothcond232, label %bb187, label %bb188
-
-bb187: ; preds = %bb169
- unwind
-
-bb188: ; preds = %bb169
- %tmp192 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 3.000000e+01 ) ; <float> [#uses=2]
- %tmp194 = load float* %tmp90 ; <float> [#uses=1]
- %tmp196 = fcmp ugt float %tmp194, 0.000000e+00 ; <i1> [#uses=1]
- br i1 %tmp196, label %bb207, label %cond_next200
-
-cond_next200: ; preds = %bb188
- %tmp202 = load float* %tmp111 ; <float> [#uses=1]
- %tmp204 = fcmp ult float %tmp202, 0.000000e+00 ; <i1> [#uses=1]
- br i1 %tmp204, label %bb207, label %bb208
-
-bb207: ; preds = %cond_next200, %bb188
- unwind
-
-bb208: ; preds = %cond_next200
- %tmp212 = call fastcc float @c34003a__ident.154( %struct.FRAME.c34003a* %FRAME.31, float 0.000000e+00 ) ; <float> [#uses=1]
- %tmp214 = fcmp oge float %tmp212, %tmp192 ; <i1> [#uses=1]
- %tmp217 = fcmp oge float %tmp192, 1.000000e+02 ; <i1> [#uses=1]
- %tmp221 = or i1 %tmp214, %tmp217 ; <i1> [#uses=1]
- br i1 %tmp221, label %cond_true224, label %UnifiedReturnBlock
-
-cond_true224: ; preds = %bb208
- call void @abort( ) noreturn
- ret void
-
-UnifiedReturnBlock: ; preds = %bb208
- ret void
-}
-
-declare fastcc float @c34003a__ident.154(%struct.FRAME.c34003a* %CHAIN.32, float %x)
-
-declare i32 @report__ident_int(i32 %x)
-
-declare void @abort() noreturn
diff --git a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll b/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
deleted file mode 100644
index b936686798f0..000000000000
--- a/test/CodeGen/X86/2008-01-25-EmptyFunction.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=x86 | grep nop
-target triple = "i686-apple-darwin8"
-
-
-define void @bork() noreturn nounwind {
-entry:
- unreachable
-}
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll b/test/CodeGen/X86/2008-08-06-CmpStride.ll
index 99cb8569b3f4..99cb8569b3f4 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-06-CmpStride.ll
+++ b/test/CodeGen/X86/2008-08-06-CmpStride.ll
diff --git a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
index 36cc53545103..36cc53545103 100644
--- a/test/Transforms/LoopStrengthReduce/2009-02-09-ivs-different-sizes.ll
+++ b/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 4a97ac35afc7..bb01e5afceff 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {7 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {6 machine-licm}
; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
; rdar://6627786
; rdar://7792037
diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
index 8d426271a194..28539307aa40 100644
--- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
+++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
; Check the register copy comes after the call to f and before the call to g
; PR3784
diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
index da493d4910e1..b13d33eb3fd9 100644
--- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
+++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s
; Check that register copies in the landing pad come after the EH_LABEL
declare i32 @f()
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index b5873bae5f05..90dabb8ab635 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -stats -info-output-file - > %t
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
; RUN: not grep spill %t
; RUN: not grep {%rsp} %t
; RUN: not grep {%rbp} %t
diff --git a/test/DebugInfo/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index 001f853dd236..85ee091c3478 100644
--- a/test/DebugInfo/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -1,7 +1,4 @@
-; RUN: llc -O0 < %s | FileCheck %s
-; ModuleID = 'try.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin9.8"
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
; Currently, dbg.declare generates a DEBUG_VALUE comment. Eventually it will
; generate DWARF and this test will need to be modified or removed.
diff --git a/test/DebugInfo/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index 70103e5f72bd..2113263c0ac3 100644
--- a/test/DebugInfo/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -1,6 +1,5 @@
; RUN: llc -O1 < %s
; ModuleID = 'pr6157.bc'
-target triple = "x86_64-unknown-linux-gnu"
; formerly crashed in SelectionDAGBuilder
%tart.reflect.ComplexType = type { double, double }
diff --git a/test/DebugInfo/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 52e948428952..d2115496f8f4 100644
--- a/test/DebugInfo/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O2 < %s -mtriple=x86_64-apple-darwin | grep debug_loc12
+; RUN: llc -march=x86-64 -O2 < %s | grep debug_loc12
; Test to check .debug_loc support. This test case emits 13 debug_loc entries.
%0 = type { double }
diff --git a/test/DebugInfo/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 80643d0792ac..80643d0792ac 100644
--- a/test/DebugInfo/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
diff --git a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 812d3720d6f5..812d3720d6f5 100644
--- a/test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
new file mode 100644
index 000000000000..be7d94c4f291
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=i486
+; PR7375
+;
+; This function contains a block (while.cond) with a lonely RFP use that is
+; not a kill. We still need an FP_REG_KILL for that block since the register
+; allocator will insert a reload.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
+entry:
+ %tmp2 = load double* %t ; <double> [#uses=1]
+ br i1 undef, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ br i1 undef, label %if.end, label %bb.nph
+
+while.cond: ; preds = %bb.nph, %while.cond
+ store double %tmp2, double* undef
+ br i1 undef, label %if.end, label %while.cond
+
+bb.nph: ; preds = %if.then
+ br label %while.cond
+
+if.end: ; preds = %while.cond, %if.then, %entry
+ ret void
+}
diff --git a/test/CodeGen/X86/2010-07-15-Crash.ll b/test/CodeGen/X86/2010-07-15-Crash.ll
new file mode 100644
index 000000000000..3ac4cf5964c3
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-15-Crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null
+; PR7653
+
+@__FUNCTION__.1623 = external constant [4 x i8] ; <[4 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+ tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
new file mode 100644
index 000000000000..96016cfe1c73
--- /dev/null
+++ b/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i32 @extend2bit_v2(i32 %val) {
+entry:
+ %0 = trunc i32 %val to i2 ; <i2> [#uses=1]
+ %1 = sext i2 %0 to i32 ; <i32> [#uses=1]
+ %2 = icmp eq i32 %1, 3 ; <i1> [#uses=1]
+ %3 = zext i1 %2 to i32 ; <i32> [#uses=1]
+ ret i32 %3
+}
+
+; CHECK: extend2bit_v2:
+; CHECK: xorl %eax, %eax
+; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
new file mode 100644
index 000000000000..1919d2ef34ae
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR7814
+
+@g_16 = global i64 -3738643449681751625, align 8 ; <i64*> [#uses=1]
+@g_38 = global i32 0, align 4 ; <i32*> [#uses=2]
+@.str = private constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %tmp = load i64* @g_16 ; <i64> [#uses=1]
+ %not.lnot = icmp ne i64 %tmp, 0 ; <i1> [#uses=1]
+ %conv = sext i1 %not.lnot to i64 ; <i64> [#uses=1]
+ %and = and i64 %conv, 150 ; <i64> [#uses=1]
+ %conv.i = trunc i64 %and to i8 ; <i8> [#uses=1]
+ %cmp = icmp sgt i8 %conv.i, 0 ; <i1> [#uses=1]
+ br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+; CHECK: andl $150
+; CHECK-NEXT: testb
+; CHECK-NEXT: jg
+
+entry.if.end_crit_edge: ; preds = %entry
+ %tmp4.pre = load i32* @g_38 ; <i32> [#uses=1]
+ br label %if.end
+
+if.then: ; preds = %entry
+ store i32 1, i32* @g_38
+ br label %if.end
+
+if.end: ; preds = %entry.if.end_crit_edge, %if.then
+ %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
+ %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
new file mode 100644
index 000000000000..98a0887c0e69
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32
+
+define void @func() nounwind {
+invoke.cont:
+ %call = tail call i8* @malloc()
+ %a = invoke i32 @bar()
+ to label %bb1 unwind label %lpad
+
+bb1:
+ ret void
+
+lpad:
+ %exn = tail call i8* @llvm.eh.exception() nounwind
+ %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+ %ehspec.fails = icmp slt i32 %eh.selector, 0
+ br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
+
+cleanup:
+ tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
+ unreachable
+
+ehspec.unexpected:
+ tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
+ unreachable
+}
+
+declare noalias i8* @malloc()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+declare i32 @bar()
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
new file mode 100644
index 000000000000..d98ef14e108b
--- /dev/null
+++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: ascii "ro" #{{#?}} DW_AT_name
+
+define void @foo() nounwind ssp {
+entry:
+ call void @bar(i32 201), !dbg !8
+ ret void, !dbg !8
+}
+
+declare void @bar(i32)
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!5}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
+!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 14, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
new file mode 100644
index 000000000000..e5542baf2ee8
--- /dev/null
+++ b/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef.
+
+define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
+ %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
+ %tmp1 = trunc i64 %x1 to i32 ; <i32> [#uses=1]
+; CHECK: movl (%r{{.*}}), %
+ %x4 = load i32* %h, align 4 ; <i32> [#uses=1]
+
+; The imull clobbers a 32-bit register.
+; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
+ %x5 = mul nsw i32 %x4, %tmp1 ; <i32> [#uses=1]
+
+; So we cannot use the corresponding 64-bit register anymore.
+; CHECK-NOT: shrq $32, %r[[CLOBBER]]
+ %btmp3 = lshr i64 %x1, 32 ; <i64> [#uses=1]
+ %btmp4 = trunc i64 %btmp3 to i32 ; <i32> [#uses=1]
+
+; CHECK: idiv
+ %x6 = sdiv i32 %x5, %btmp4 ; <i32> [#uses=1]
+ store i32 %x6, i32* %w, align 4
+ ret void
+}
+
+declare i64 @g(i8*, i8*)
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
index f2005891a59a..629a14773615 100644
--- a/test/CodeGen/X86/GC/dg.exp
+++ b/test/CodeGen/X86/GC/dg.exp
@@ -1,3 +1,5 @@
load_lib llvm.exp
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+if { [llvm_supports_target X86] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/X86/MachineSink-PHIUse.ll b/test/CodeGen/X86/MachineSink-PHIUse.ll
new file mode 100644
index 000000000000..728e37736018
--- /dev/null
+++ b/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink}
+
+define fastcc void @t() nounwind ssp {
+entry:
+ br i1 undef, label %bb, label %bb4
+
+bb: ; preds = %entry
+ br i1 undef, label %return, label %bb3
+
+bb3: ; preds = %bb
+ unreachable
+
+bb4: ; preds = %entry
+ br i1 undef, label %bb.nph, label %return
+
+bb.nph: ; preds = %bb4
+ br label %bb5
+
+bb5: ; preds = %bb9, %bb.nph
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1]
+ %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2]
+ %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0]
+ br i1 undef, label %bb9, label %bb6
+
+bb6: ; preds = %bb5
+ br i1 undef, label %bb9, label %bb7
+
+bb7: ; preds = %bb6
+ br i1 undef, label %bb9, label %bb8
+
+bb8: ; preds = %bb7
+ unreachable
+
+bb9: ; preds = %bb7, %bb6, %bb5
+ br i1 undef, label %bb5, label %return
+
+return: ; preds = %bb9, %bb4, %bb
+ ret void
+}
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
new file mode 100644
index 000000000000..a72160be719a
--- /dev/null
+++ b/test/CodeGen/X86/avx-128.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@z = common global <4 x float> zeroinitializer, align 16
+
+define void @zero() nounwind ssp {
+entry:
+ ; CHECK: vpxor
+ ; CHECK: vmovaps
+ store <4 x float> zeroinitializer, <4 x float>* @z, align 16
+ ret void
+}
+
diff --git a/test/CodeGen/X86/avx-256.ll b/test/CodeGen/X86/avx-256.ll
new file mode 100644
index 000000000000..20d31e738857
--- /dev/null
+++ b/test/CodeGen/X86/avx-256.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@x = common global <8 x float> zeroinitializer, align 32
+@y = common global <4 x double> zeroinitializer, align 32
+
+define void @zero() nounwind ssp {
+entry:
+ ; CHECK: vxorps
+ ; CHECK: vmovaps
+ ; CHECK: vmovaps
+ store <8 x float> zeroinitializer, <8 x float>* @x, align 32
+ store <4 x double> zeroinitializer, <4 x double>* @y, align 32
+ ret void
+}
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
new file mode 100644
index 000000000000..9de90237d146
--- /dev/null
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -0,0 +1,2587 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vaesdec
+ %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vaesdeclast
+ %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vaesenc
+ %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vaesenclast
+ %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+ ; CHECK: vaesimc
+ %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+ ; CHECK: vaeskeygenassist
+ %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vaddsd
+ %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcmpordpd
+ %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcmpordsd
+ %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: setae
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: setbe
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vcomisd
+ ; CHECK: setne
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+ ; CHECK: vcvtdq2pd
+ %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+ ; CHECK: vcvtdq2ps
+ %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+ ; CHECK: vcvtpd2dq
+ %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+ ; CHECK: vcvtpd2ps
+ %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+ ; CHECK: vcvtps2dq
+ %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+ ; CHECK: vcvtps2pd
+ %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+ ; CHECK: vcvtsd2si
+ %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+ ; CHECK: vcvtsd2ss
+ %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+ ; CHECK: movl
+ ; CHECK: vcvtsi2sd
+ %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+ ; CHECK: vcvtss2sd
+ %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+ ; CHECK: vcvttpd2dq
+ %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+ ; CHECK: vcvttps2dq
+ %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+ ; CHECK: vcvttss2si
+ %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vdivsd
+ %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vmovdqu
+ %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vmovupd
+ %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
+
+
+define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
+ ; CHECK: pushl
+ ; CHECK: movl
+ ; CHECK: vmaskmovdqu
+ ; CHECK: popl
+ call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
+ ret void
+}
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vmaxpd
+ %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vmaxsd
+ %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vminpd
+ %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vminsd
+ %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+ ; CHECK: vmovmskpd
+ %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovntdq
+ call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+ ret void
+}
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
+
+
+define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovntpd
+ call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+ ret void
+}
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vmulsd
+ %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpackssdw
+ %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpacksswb
+ %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpackuswb
+ %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpaddsb
+ %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpaddsw
+ %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpaddusb
+ %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpaddusw
+ %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpavgb
+ %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpavgw
+ %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpeqb
+ %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcmpeqd
+ %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcmpeqw
+ %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpgtb
+ %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpcmpgtd
+ %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpcmpgtw
+ %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmaddwd
+ %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmaxsw
+ %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpmaxub
+ %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpminsw
+ %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpminub
+ %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+ ; CHECK: vpmovmskb
+ %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmulhw
+ %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmulhuw
+ %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpmuludq
+ %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpsadbw
+ %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpslld
+ %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+ ; CHECK: vpslldq
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+ ; CHECK: vpslldq
+ %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpsllq
+ %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsllw
+ %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+ ; CHECK: vpslld
+ %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+ ; CHECK: vpsllq
+ %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+ ; CHECK: vpsllw
+ %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpsrad
+ %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsraw
+ %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+ ; CHECK: vpsrad
+ %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+ ; CHECK: vpsraw
+ %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpsrld
+ %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+ ; CHECK: vpsrldq
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+ ; CHECK: vpsrldq
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpsrlq
+ %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsrlw
+ %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+ ; CHECK: vpsrld
+ %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+ ; CHECK: vpsrlq
+ %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+ ; CHECK: vpsrlw
+ %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpsubsb
+ %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsubsw
+ %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpsubusb
+ %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsubusw
+ %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+ ; CHECK: vsqrtpd
+ %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+ ; CHECK: vsqrtsd
+ %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovq
+ call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+ ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovdqu
+ call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovupd
+ call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+ ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vsubsd
+ %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: setae
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: setbe
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vucomisd
+ ; CHECK: setne
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vaddsubpd
+ %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vaddsubps
+ %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vhaddpd
+ %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vhaddps
+ %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vhsubpd
+ %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vhsubps
+ %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vlddqu
+ %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vblendpd
+ %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vblendps
+ %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: vblendvpd
+ %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: vblendvps
+ %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vdppd
+ %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vdpps
+ %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vinsertps
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vmovntdqa
+ %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vmpsadbw
+ %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpackusdw
+ %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+ ; CHECK: vpblendvb
+ %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpblendw
+ %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcmpeqq
+ %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+ ; CHECK: vphminposuw
+ %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpmaxsb
+ %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpmaxsd
+ %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpmaxud
+ %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmaxuw
+ %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpminsb
+ %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpminsd
+ %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpminud
+ %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpminuw
+ %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+ ; CHECK: vpmovsxbd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+ ; CHECK: vpmovsxbq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+ ; CHECK: vpmovsxbw
+ %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+ ; CHECK: vpmovsxdq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+ ; CHECK: vpmovsxwd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+ ; CHECK: vpmovsxwq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+ ; CHECK: vpmovzxbd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+ ; CHECK: vpmovzxbq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+ ; CHECK: vpmovzxbw
+ %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+ ; CHECK: vpmovzxdq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+ ; CHECK: vpmovzxwd
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+ ; CHECK: vpmovzxwq
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpmuldq
+ %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vptest
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vptest
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vptest
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+ ; CHECK: vroundpd
+ %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+ ; CHECK: vroundps
+ %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vroundsd
+ %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vroundss
+ %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+ ; CHECK: movl
+ ; CHECK: movl
+ ; CHECK: vpcmpestrm
+ %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+ ; CHECK: vpcmpgtq
+ %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistri
+ ; CHECK: movl
+ %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpcmpistrm
+ %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vaddss
+ %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcmpordps
+ %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcmpordss
+ %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: setae
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: setbe
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vcomiss
+ ; CHECK: setne
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+ ; CHECK: movl
+ ; CHECK: vcvtsi2ss
+ %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+ ; CHECK: vcvtss2si
+ %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+ ; CHECK: vcvttss2si
+ %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vdivss
+ %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vldmxcsr
+ call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+ ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vmovups
+ %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vmaxps
+ %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vmaxss
+ %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vminps
+ %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vminss
+ %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+ ; CHECK: vmovmskps
+ %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovntps
+ call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vmulss
+ %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+ ; CHECK: vrcpps
+ %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+ ; CHECK: vrcpss
+ %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+ ; CHECK: vrsqrtps
+ %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+ ; CHECK: vrsqrtss
+ %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+ ; CHECK: vsqrtps
+ %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+ ; CHECK: vsqrtss
+ %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+ ; CHECK: movl
+ ; CHECK: vstmxcsr
+ call void @llvm.x86.sse.stmxcsr(i8* %a0)
+ ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+ ; CHECK: movl
+ ; CHECK: vmovups
+ call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vsubss
+ %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: setae
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: setbe
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vucomiss
+ ; CHECK: setne
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+ ; CHECK: vpabsb
+ %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+ ; CHECK: vpabsd
+ %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+ ; CHECK: vpabsw
+ %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vphaddd
+ %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vphaddsw
+ %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vphaddw
+ %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vphsubd
+ %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vphsubsw
+ %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vphsubw
+ %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmaddubsw
+ %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpmulhrsw
+ %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpshufb
+ %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+ ; CHECK: vpsignb
+ %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vpsignd
+ %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+ ; CHECK: vpsignw
+ %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vaddsubpd
+ %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vaddsubps
+ %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vblendpd
+ %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vblendps
+ %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+ ; CHECK: vblendvpd
+ %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: vblendvps
+ %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vcmpordpd
+ %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vcmpordps
+ %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
+ ; CHECK: vcvtpd2psy
+ %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
+ ; CHECK: vcvtpd2dqy
+ %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+ ; CHECK: vcvtps2pd
+ %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
+ ; CHECK: vcvtps2dq
+ %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+ ; CHECK: vcvtdq2pd
+ %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
+ ; CHECK: vcvtdq2ps
+ %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+ ; CHECK: vcvttpd2dqy
+ %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+ ; CHECK: vcvttps2dq
+ %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vdpps
+ %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vhaddpd
+ %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vhaddps
+ %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vhsubpd
+ %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vhsubps
+ %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
+ ; CHECK: vlddqu
+ %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
+ ; CHECK: vmovdqu
+ %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+ ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
+ ; CHECK: vmovupd
+ %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
+ ; CHECK: vmovups
+ %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+ ; CHECK: vmaskmovpd
+ %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+ ; CHECK: vmaskmovpd
+ %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+ ; CHECK: vmaskmovps
+ %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+ ; CHECK: vmaskmovps
+ %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+
+
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+ ; CHECK: vmaskmovpd
+ call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+ ; CHECK: vmaskmovpd
+ call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+ ; CHECK: vmaskmovps
+ call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+ ; CHECK: vmaskmovps
+ call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+ ret void
+}
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vmaxpd
+ %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vmaxps
+ %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vminpd
+ %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vminps
+ %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+ ; CHECK: vmovmskpd
+ %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+ ; CHECK: vmovmskps
+ %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
+ ; CHECK: vmovntdq
+ call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+
+define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
+ ; CHECK: vmovntpd
+ call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
+ ; CHECK: vmovntps
+ call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+
+define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
+ ; CHECK: vptest
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
+ ; CHECK: vptest
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
+ ; CHECK: vptest
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
+ ; CHECK: vrcpps
+ %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
+ ; CHECK: vroundpd
+ %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
+ ; CHECK: vroundps
+ %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
+ ; CHECK: vrsqrtps
+ %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
+ ; CHECK: vsqrtpd
+ %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
+ ; CHECK: vsqrtps
+ %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+ ; CHECK: vmovdqu
+ call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+
+define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
+ ; CHECK: vmovupd
+ call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
+ ; CHECK: vmovups
+ call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
+ ret void
+}
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
+ ; CHECK: vbroadcastsd
+ %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
+ ; CHECK: vbroadcastf128
+ %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
+ ; CHECK: vbroadcastf128
+ %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+ ; CHECK: vbroadcastss
+ %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+ ; CHECK: vbroadcastss
+ %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
+ ; CHECK: vextractf128
+ %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
+ ; CHECK: vextractf128
+ %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
+ ; CHECK: vextractf128
+ %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vinsertf128
+ %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vinsertf128
+ %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
+ ; CHECK: vinsertf128
+ %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vperm2f128
+ %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vperm2f128
+ %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
+ ; CHECK: vperm2f128
+ %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
+ ; CHECK: vpermilpd
+ %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
+ ; CHECK: vpermilpd
+ %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
+ ; CHECK: vpermilps
+ %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
+ ; CHECK: vpermilps
+ %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
+ ; CHECK: vpermilpd
+ %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+ ; CHECK: vpermilpd
+ %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
+ ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
+ ; CHECK: vpermilps
+ %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+ ; CHECK: vpermilps
+ %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
+ ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: setb
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: seta
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
+ ; CHECK: vtestpd
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
+ ; CHECK: vtestps
+ ; CHECK: sete
+ ; CHECK: movzbl
+ %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+ ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_vzeroall() {
+ ; CHECK: vzeroall
+ call void @llvm.x86.avx.vzeroall()
+ ret void
+}
+declare void @llvm.x86.avx.vzeroall() nounwind
+
+
+define void @test_x86_avx_vzeroupper() {
+ ; CHECK: vzeroupper
+ call void @llvm.x86.avx.vzeroupper()
+ ret void
+}
+declare void @llvm.x86.avx.vzeroupper() nounwind
+
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
new file mode 100644
index 000000000000..b1867105ce85
--- /dev/null
+++ b/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+ ; CHECK: vcvtsd2si
+ %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+ ; CHECK: vcvtsi2sd
+ %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+ ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+ ; CHECK: vcvttss2si
+ %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+ ; CHECK: vcvtss2si
+ %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+ ; CHECK: vcvtsi2ss
+ %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+ ; CHECK: vcvttss2si
+ %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+
diff --git a/test/CodeGen/X86/barrier-sse.ll b/test/CodeGen/X86/barrier-sse.ll
new file mode 100644
index 000000000000..6190c3684ed6
--- /dev/null
+++ b/test/CodeGen/X86/barrier-sse.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 false, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 true, i1 false)
+ call void @llvm.memory.barrier( i1 true, i1 false, i1 true, i1 true, i1 false)
+
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+ call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+ ret void
+}
diff --git a/test/CodeGen/X86/barrier.ll b/test/CodeGen/X86/barrier.ll
new file mode 100644
index 000000000000..fad6ef690c2f
--- /dev/null
+++ b/test/CodeGen/X86/barrier.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+ call void @llvm.memory.barrier( i1 true, i1 true, i1 false, i1 false, i1 false)
+ ret void
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/call-imm.ll b/test/CodeGen/X86/call-imm.ll
index 255adfbb2bb4..3857fb157905 100644
--- a/test/CodeGen/X86/call-imm.ll
+++ b/test/CodeGen/X86/call-imm.ll
@@ -5,7 +5,7 @@
; Call to immediate is not safe on x86-64 unless we *know* that the
; call will be within 32-bits pcrel from the dest immediate.
-; RUN: llc < %s -march=x86-64 | grep {call.*\*%rax}
+; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
; PR3666
; PR3773
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
index 1f7f6ecafafb..1f7f6ecafafb 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
index cb638092ea1a..cb638092ea1a 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
diff --git a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
index ae27383895ce..ae27383895ce 100644
--- a/test/Transforms/LoopStrengthReduce/change-compare-stride-trickiness-2.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
diff --git a/test/CodeGen/X86/constant-pool-remat-0.ll b/test/CodeGen/X86/constant-pool-remat-0.ll
index 05388f9b2a96..2a44463e5d32 100644
--- a/test/CodeGen/X86/constant-pool-remat-0.ll
+++ b/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -march=x86-64 | grep LCPI | count 3
-; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep LCPI | count 3
-; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12
declare float @qux(float %y)
diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll
index f29cbf323e37..96fef0fbfc61 100644
--- a/test/CodeGen/X86/critical-edge-split.ll
+++ b/test/CodeGen/X86/critical-edge-split.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 29
+; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
%CC = type { %Register }
%II = type { %"struct.XX::II::$_74" }
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index bdbaac05f118..bf57e78f35d4 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s | FileCheck %s
; PR2936
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
define dllexport x86_fastcallcc i32 @foo() nounwind {
entry:
diff --git a/test/CodeGen/X86/dyn-stackalloc.ll b/test/CodeGen/X86/dyn-stackalloc.ll
index 1df092018dd8..e577611ebcf1 100644
--- a/test/CodeGen/X86/dyn-stackalloc.ll
+++ b/test/CodeGen/X86/dyn-stackalloc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | not egrep {\\\$4294967289|-7}
-; RUN: llc < %s -march=x86 | egrep {\\\$4294967280|-16}
-; RUN: llc < %s -march=x86-64 | grep {\\-16}
+; RUN: llc < %s -mtriple=i686-linux | not egrep {\\\$4294967289|-7}
+; RUN: llc < %s -mtriple=i686-linux | egrep {\\\$4294967280|-16}
+; RUN: llc < %s -mtriple=x86_64-linux | grep {\\-16}
define void @t() nounwind {
A:
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
new file mode 100644
index 000000000000..b303cd1f7368
--- /dev/null
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+ unreachable
+}
+; CHECK-NO-FP: _func:
+; CHECK-NO-FP-NOT: movq %rsp, %rbp
+; CHECK-NO-FP: nop
+
+; CHECK-FP: _func:
+; CHECK-FP: movq %rsp, %rbp
+; CHECK-FP-NEXT: Ltmp1:
+; CHECK-FP: nop
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 23b45ebb8d8b..9ded7e05dc46 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,7 +1,7 @@
; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
; RUN: count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-finite-only-fp-math | \
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
; RUN: grep fabs\$ | count 3
declare float @fabsf(float)
diff --git a/test/CodeGen/X86/fast-isel-atomic.ll b/test/CodeGen/X86/fast-isel-atomic.ll
new file mode 100644
index 000000000000..74c586846d96
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-atomic.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -march=x86-64
+; rdar://8204072
+; PR7652
+
+@sc = external global i8
+@uc = external global i8
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+ %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1]
+ store i8 %tmp40, i8* @sc
+ %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1]
+ store i8 %tmp41, i8* @uc
+ ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
new file mode 100644
index 000000000000..4ab1bc61c7e2
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s
+; rdar://8337108
+
+; Fast-isel shouldn't try to look through the compare because it's in a
+; different basic block, so its operands aren't necessarily exported
+; for cross-block usage.
+
+; CHECK: movb %al, 7(%rsp)
+; CHECK: callq {{_?}}bar
+; CHECK: movb 7(%rsp), %al
+
+declare void @bar()
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+ %q = add i32 %a, 7
+ %r = add i32 %b, 9
+ %t = icmp ult i32 %q, %r
+ invoke void @bar() to label %next unwind label %unw
+next:
+ br i1 %t, label %true, label %return
+true:
+ call void @bar()
+ br label %return
+return:
+ ret void
+unw:
+ unreachable
+}
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 1270ab78ab5f..577dd7223a4d 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
%t15 = load i32* %t9 ; <i32> [#uses=1]
ret i32 %t15
; X32: test1:
-; X32: movl (%ecx,%eax,4), %eax
+; X32: movl (%eax,%ecx,4), %eax
; X32: ret
; X64: test1:
@@ -23,7 +23,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
%t15 = load i32* %t9 ; <i32> [#uses=1]
ret i32 %t15
; X32: test2:
-; X32: movl (%eax,%ecx,4), %eax
+; X32: movl (%edx,%ecx,4), %eax
; X32: ret
; X64: test2:
diff --git a/test/CodeGen/X86/fast-isel-shift-imm.ll b/test/CodeGen/X86/fast-isel-shift-imm.ll
index 7759bb056892..5c62c1880516 100644
--- a/test/CodeGen/X86/fast-isel-shift-imm.ll
+++ b/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %eax}
+; RUN: llc < %s -march=x86 -O0 | grep {sarl \$80, %e}
; PR3242
define void @foo(i32 %x, i32* %p) nounwind {
diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll
new file mode 100644
index 000000000000..ffcbf8a908c8
--- /dev/null
+++ b/test/CodeGen/X86/force-align-stack.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; Tests to make sure that we always align the stack out to the minimum needed -
+; in this case 16-bytes.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.3"
+
+define void @a() nounwind ssp {
+entry:
+; CHECK: _a:
+; CHECK: andl $-16, %esp
+ %z = alloca <16 x i8> ; <<16 x i8>*> [#uses=2]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16
+ call void @b(<16 x i8>* %z) nounwind
+ br label %return
+
+return: ; preds = %entry
+ ret void
+}
+
+declare void @b(<16 x i8>*)
diff --git a/test/Transforms/LoopStrengthReduce/insert-positions.ll b/test/CodeGen/X86/insert-positions.ll
index 1a695f35e3b0..1a695f35e3b0 100644
--- a/test/Transforms/LoopStrengthReduce/insert-positions.ll
+++ b/test/CodeGen/X86/insert-positions.ll
diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll
new file mode 100644
index 000000000000..45a9b0f15c67
--- /dev/null
+++ b/test/CodeGen/X86/int-intrinsic.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @llvm.x86.int(i8) nounwind
+
+; CHECK: int3
+; CHECK: ret
+define void @primitive_int3 () {
+bb.entry:
+ call void @llvm.x86.int(i8 3) nounwind
+ ret void
+}
+
+; CHECK: int $-128
+; CHECK: ret
+define void @primitive_int128 () {
+bb.entry:
+ call void @llvm.x86.int(i8 128) nounwind
+ ret void
+}
diff --git a/test/CodeGen/X86/licm-nested.ll b/test/CodeGen/X86/licm-nested.ll
index 71685bb5b83a..b0105ac533bd 100644
--- a/test/CodeGen/X86/licm-nested.ll
+++ b/test/CodeGen/X86/licm-nested.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -stats -info-output-file - | grep machine-licm | grep 2
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3
; MachineLICM should be able to hoist the symbolic addresses out of
; the inner loops.
diff --git a/test/CodeGen/X86/lock-inst-encoding.ll b/test/CodeGen/X86/lock-inst-encoding.ll
new file mode 100644
index 000000000000..03468e2b3f4f
--- /dev/null
+++ b/test/CodeGen/X86/lock-inst-encoding.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: f0:
+; CHECK: addq %rax, (%rdi)
+; CHECK: # encoding: [0xf0,0x48,0x01,0x07]
+; CHECK: ret
+define void @f0(i64* %a0) {
+ %t0 = and i64 1, 1
+ call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+ %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind
+ call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+ ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/X86/loop-strength-reduce4.ll b/test/CodeGen/X86/loop-strength-reduce4.ll
index 6c0eb8c0df93..6556fdeea834 100644
--- a/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
-; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
diff --git a/test/CodeGen/X86/lsr-interesting-step.ll b/test/CodeGen/X86/lsr-interesting-step.ll
new file mode 100644
index 000000000000..4b7050bd507b
--- /dev/null
+++ b/test/CodeGen/X86/lsr-interesting-step.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu
+
+; The inner loop should require only one add (and no leas either).
+; rdar://8100380
+
+; CHECK: BB0_4:
+; CHECK-NEXT: movb $0, flags(%rdx)
+; CHECK-NEXT: addq %rcx, %rdx
+; CHECK-NEXT: cmpq $8192, %rdx
+; CHECK-NEXT: jl
+
+@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+ %tmp = icmp slt i64 2, 8192 ; <i1> [#uses=1]
+ br i1 %tmp, label %bb, label %bb21
+
+bb: ; preds = %entry
+ br label %bb7
+
+bb7: ; preds = %bb, %bb17
+ %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ] ; <i64> [#uses=2]
+ %tmp9 = icmp slt i64 2, 8192 ; <i1> [#uses=1]
+ br i1 %tmp9, label %bb10, label %bb17
+
+bb10: ; preds = %bb7
+ br label %bb11
+
+bb11: ; preds = %bb10, %bb11
+ %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2]
+ %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
+ store i8 0, i8* %tmp13, align 1
+ %tmp14 = add nsw i64 %tmp12, %tmp8 ; <i64> [#uses=2]
+ %tmp15 = icmp slt i64 %tmp14, 8192 ; <i1> [#uses=1]
+ br i1 %tmp15, label %bb11, label %bb16
+
+bb16: ; preds = %bb11
+ br label %bb17
+
+bb17: ; preds = %bb16, %bb7
+ %tmp18 = add nsw i64 %tmp8, 1 ; <i64> [#uses=2]
+ %tmp19 = icmp slt i64 %tmp18, 8192 ; <i1> [#uses=1]
+ br i1 %tmp19, label %bb7, label %bb20
+
+bb20: ; preds = %bb17
+ br label %bb21
+
+bb21: ; preds = %bb20, %entry
+ ret void
+}
diff --git a/test/CodeGen/X86/lsr-normalization.ll b/test/CodeGen/X86/lsr-normalization.ll
new file mode 100644
index 000000000000..932141d0448e
--- /dev/null
+++ b/test/CodeGen/X86/lsr-normalization.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 1
+; rdar://8168938
+
+; This testcase involves SCEV normalization with the exit value from
+; one loop involved with the increment value for an addrec on another
+; loop. The expression should be properly normalized and simplified,
+; and require only a single division.
+
+%0 = type { %0*, %0* }
+
+@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
+@1 = internal constant [5 x i8] c"Huh?\00" ; <[5 x i8]*> [#uses=1]
+
+define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind {
+bb:
+ %tmp = alloca %0, align 8 ; <%0*> [#uses=11]
+ %tmp2 = bitcast %0* %tmp to i8* ; <i8*> [#uses=1]
+ call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
+ %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+ store %0* %tmp, %0** %tmp3
+ %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+ store %0* %tmp, %0** %tmp4
+ %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
+ %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+ %tmp7 = icmp eq i8* %tmp6, null ; <i1> [#uses=1]
+ br i1 %tmp7, label %bb10, label %bb8
+
+bb8: ; preds = %bb
+ %tmp9 = bitcast i8* %tmp6 to i32* ; <i32*> [#uses=1]
+ store i32 1, i32* %tmp9
+ br label %bb10
+
+bb10: ; preds = %bb8, %bb
+ %tmp11 = bitcast i8* %tmp5 to %0* ; <%0*> [#uses=1]
+ call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
+ %tmp12 = load %0** %tmp3 ; <%0*> [#uses=3]
+ %tmp13 = icmp eq %0* %tmp12, %tmp ; <i1> [#uses=1]
+ br i1 %tmp13, label %bb14, label %bb16
+
+bb14: ; preds = %bb10
+ %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+ br label %bb35
+
+bb16: ; preds = %bb16, %bb10
+ %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
+ %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
+ %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp20 = load %0** %tmp19 ; <%0*> [#uses=2]
+ %tmp21 = icmp eq %0* %tmp20, %tmp ; <i1> [#uses=1]
+ %tmp22 = add i64 %tmp17, 1 ; <i64> [#uses=2]
+ br i1 %tmp21, label %bb23, label %bb16
+
+bb23: ; preds = %bb16
+ %tmp24 = udiv i64 100, %tmp22 ; <i64> [#uses=1]
+ br label %bb25
+
+bb25: ; preds = %bb25, %bb23
+ %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
+ %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
+ %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp29 = load %0** %tmp28 ; <%0*> [#uses=2]
+ %tmp30 = icmp eq %0* %tmp29, %tmp ; <i1> [#uses=1]
+ %tmp31 = add i64 %tmp26, 1 ; <i64> [#uses=2]
+ br i1 %tmp30, label %bb32, label %bb25
+
+bb32: ; preds = %bb25
+ %tmp33 = mul i64 %tmp31, %tmp24 ; <i64> [#uses=1]
+ %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+ br label %bb35
+
+bb35: ; preds = %bb32, %bb14
+ %tmp36 = load %0** %tmp3 ; <%0*> [#uses=2]
+ %tmp37 = icmp eq %0* %tmp36, %tmp ; <i1> [#uses=1]
+ br i1 %tmp37, label %bb44, label %bb38
+
+bb38: ; preds = %bb38, %bb35
+ %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
+ %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+ %tmp41 = load %0** %tmp40 ; <%0*> [#uses=2]
+ %tmp42 = bitcast %0* %tmp39 to i8* ; <i8*> [#uses=1]
+ call void @_ZdlPv(i8* %tmp42) nounwind
+ %tmp43 = icmp eq %0* %tmp41, %tmp ; <i1> [#uses=1]
+ br i1 %tmp43, label %bb44, label %bb38
+
+bb44: ; preds = %bb38, %bb35
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*)
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index b7e69b84bf84..d2ff58be1055 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -464,7 +464,7 @@ bb5: ; preds = %bb3, %entry
; And the one at %bb68, where we want to be sure to use superhero mode:
-; CHECK: BB10_10:
+; CHECK: BB10_9:
; CHECK-NEXT: movaps 48(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: mulps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: movaps 32(%r{{[^,]*}}), %xmm{{.*}}
@@ -484,7 +484,7 @@ bb5: ; preds = %bb3, %entry
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $-16, %r{{.*}}
-; CHECK-NEXT: BB10_11:
+; CHECK-NEXT: BB10_10:
; CHECK-NEXT: cmpq $15, %r{{.*}}
; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
new file mode 100644
index 000000000000..c9ed3e553a46
--- /dev/null
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+
+; CHECK: xorl %eax, %eax
+; CHECK: movsd .LCPI0_0(%rip), %xmm0
+; CHECK: align
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT: movsd A(,%rax,8)
+; CHECK-NEXT: mulsd
+; CHECK-NEXT: movsd
+; CHECK-NEXT: incq %rax
+
+@A = external global [0 x double]
+
+define void @foo(i64 %n) nounwind {
+entry:
+ %cmp5 = icmp sgt i64 %n, 0
+ br i1 %cmp5, label %for.body, label %for.end
+
+for.body:
+ %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06
+ %tmp3 = load double* %arrayidx, align 8
+ %mul = fmul double %tmp3, 2.300000e+00
+ store double %mul, double* %arrayidx, align 8
+ %inc = add nsw i64 %i.06, 1
+ %exitcond = icmp eq i64 %inc, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/CodeGen/X86/lsr-wrap.ll b/test/CodeGen/X86/lsr-wrap.ll
index ec8db501ef34..d605e4f14fe4 100644
--- a/test/CodeGen/X86/lsr-wrap.ll
+++ b/test/CodeGen/X86/lsr-wrap.ll
@@ -3,7 +3,7 @@
; LSR would like to use a single IV for both of these, however it's
; not safe due to wraparound.
-; CHECK: addb $-4, %r
+; CHECK: addb $-4, %
; CHECK: decw %
@g_19 = common global i32 0 ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/narrow_op-2.ll b/test/CodeGen/X86/narrow_op-2.ll
deleted file mode 100644
index 796ef7a29e49..000000000000
--- a/test/CodeGen/X86/narrow_op-2.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
-
- %struct.bf = type { i64, i16, i16, i32 }
-@bfi = external global %struct.bf*
-
-define void @t1() nounwind ssp {
-entry:
-
-; CHECK: andb $-2, 10(
-; CHECK: andb $-3, 10(
-
- %0 = load %struct.bf** @bfi, align 8
- %1 = getelementptr %struct.bf* %0, i64 0, i32 1
- %2 = bitcast i16* %1 to i32*
- %3 = load i32* %2, align 1
- %4 = and i32 %3, -65537
- store i32 %4, i32* %2, align 1
- %5 = load %struct.bf** @bfi, align 8
- %6 = getelementptr %struct.bf* %5, i64 0, i32 1
- %7 = bitcast i16* %6 to i32*
- %8 = load i32* %7, align 1
- %9 = and i32 %8, -131073
- store i32 %9, i32* %7, align 1
- ret void
-}
diff --git a/test/CodeGen/X86/phi-immediate-factoring.ll b/test/CodeGen/X86/phi-immediate-factoring.ll
index 9f9f92115c79..8bed62488070 100644
--- a/test/CodeGen/X86/phi-immediate-factoring.ll
+++ b/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -4,7 +4,7 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i686-apple-darwin8"
-define i32 @foo(i32 %A, i32 %B, i32 %C) {
+define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind {
entry:
switch i32 %A, label %out [
i32 1, label %bb
diff --git a/test/CodeGen/X86/pr7882.ll b/test/CodeGen/X86/pr7882.ll
new file mode 100644
index 000000000000..88404dbe125e
--- /dev/null
+++ b/test/CodeGen/X86/pr7882.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \
+; RUN: | FileCheck %s
+; make sure scheduler honors the flags clobber. PR 7882.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind
+{
+entry:
+; CHECK: InlineAsm End
+; CHECK: cmpl
+ %res = icmp slt i32 1, %argc
+ %tmp = call i32 asm sideeffect alignstack
+ "push $$0
+ popf
+ mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res)
+ %ret = select i1 %res, i32 %tmp, i32 42
+ ret i32 %ret
+}
diff --git a/test/CodeGen/X86/shl-anyext.ll b/test/CodeGen/X86/shl-anyext.ll
new file mode 100644
index 000000000000..10d489b9a8a6
--- /dev/null
+++ b/test/CodeGen/X86/shl-anyext.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; Codegen should be able to use a 32-bit shift instead of a 64-bit shift.
+; CHECK: shll $16
+
+define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind {
+if.end523: ; preds = %if.end453
+ %conv7981749 = zext i32 %level to i64 ; <i64> [#uses=1]
+ %and799 = shl i64 %conv7981749, 16 ; <i64> [#uses=1]
+ %shl800 = and i64 %and799, 16711680 ; <i64> [#uses=1]
+ %or801 = or i64 %shl800, %a ; <i64> [#uses=1]
+ %or806 = or i64 %or801, %b ; <i64> [#uses=1]
+ %or811 = or i64 %or806, %c ; <i64> [#uses=1]
+ %or819 = or i64 %or811, %d ; <i64> [#uses=1]
+ %conv820 = trunc i64 %or819 to i32 ; <i32> [#uses=1]
+ store i32 %conv820, i32* %p
+ ret void
+}
+
+; CHECK: foo:
+
+declare void @bar(i64)
+
+define fastcc void @foo(i32 %t) {
+bb:
+ %tmp = add i32 %t, -1 ; <i32> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb
+ %tmp2 = zext i32 %tmp to i64 ; <i64> [#uses=2]
+ %tmp3 = add i64 %tmp2, 1 ; <i64> [#uses=1]
+ %tmp4 = xor i64 %tmp2, 536870911 ; <i64> [#uses=1]
+ %tmp5 = and i64 %tmp3, %tmp4 ; <i64> [#uses=1]
+ %tmp6 = shl i64 %tmp5, 3 ; <i64> [#uses=1]
+ %tmp7 = sub i64 64, %tmp6 ; <i64> [#uses=1]
+ %tmp8 = and i64 %tmp7, 4294967288 ; <i64> [#uses=1]
+ %tmp9 = lshr i64 -1, %tmp8 ; <i64> [#uses=1]
+ call void @bar(i64 %tmp9)
+ ret void
+}
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 4b27f2edb759..a3c9957be34e 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
; RUN: llc < %s -march=x86-64 -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+; Darwin 8 generates stubs, which don't match
+; XFAIL: apple-darwin8
define void @t1(i32 %x) nounwind ssp {
entry:
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index ebcdc655eeda..348121ac8bcf 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
; Some of these patterns can be matched as SSE min or max. Some of
; then can be matched provided that the operands are swapped.
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
new file mode 100644
index 000000000000..73f88aec643f
--- /dev/null
+++ b/test/CodeGen/X86/sse1.ll
@@ -0,0 +1,45 @@
+; Tests for SSE1 and below, without SSE2+.
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s
+
+define <8 x i16> @test1(<8 x i32> %a) nounwind {
+; CHECK: test1
+ ret <8 x i16> zeroinitializer
+}
+
+define <8 x i16> @test2(<8 x i32> %a) nounwind {
+; CHECK: test2
+ %c = trunc <8 x i32> %a to <8 x i16> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %c
+}
+
+; PR7993
+;define <4 x i32> @test3(<4 x i16> %a) nounwind {
+; %c = sext <4 x i16> %a to <4 x i32> ; <<4 x i32>> [#uses=1]
+; ret <4 x i32> %c
+;}
+
+; This should not emit shuffles to populate the top 2 elements of the 4-element
+; vector that this ends up returning.
+; rdar://8368414
+define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fsub float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; CHECK: test4:
+; CHECK-NOT: shufps $16
+; CHECK: shufps $1,
+; CHECK-NOT: shufps $16
+; CHECK: shufps $1,
+; CHECK-NOT: shufps $16
+; CHECK: unpcklps
+; CHECK-NOT: shufps $16
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 20b8eac9c8d8..6fc019071f8b 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,14 +1,14 @@
; Tests for SSE2 and below, without SSE3+.
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
-define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp3 = load <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
-; CHECK: t1:
+; CHECK: test1:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
; CHECK-NEXT: movlpd 12(%esp), %xmm0
@@ -17,14 +17,14 @@ define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: ret
}
-define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
+define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp3 = load <2 x double>* %A, align 16
%tmp7 = insertelement <2 x double> undef, double %B, i32 0
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
-; CHECK: t2:
+; CHECK: test2:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
; CHECK-NEXT: movhpd 12(%esp), %xmm0
@@ -32,3 +32,163 @@ define void @t2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: ret
}
+
+
+define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+ %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
+ %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2]
+ %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1]
+ %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
+ %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1]
+ %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1]
+ %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp13, <4 x float>* %res
+ ret void
+; CHECK: @test3
+; CHECK: unpcklps
+}
+
+define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+ %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp5, <4 x float>* %res
+ ret void
+; CHECK: @test4
+; CHECK: pshufd $50, %xmm0, %xmm0
+}
+
+define <4 x i32> @test5(i8** %ptr) nounwind {
+; CHECK: test5:
+; CHECK: pxor
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+ %tmp = load i8** %ptr ; <i8*> [#uses=1]
+ %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
+ %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]
+ %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
+ %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1]
+ %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1]
+ %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
+ %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp36
+}
+
+define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
+ %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1]
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp2, <4 x float>* %res
+ ret void
+
+; CHECK: test6:
+; CHECK: movaps (%eax), %xmm0
+; CHECK: movaps %xmm0, (%eax)
+}
+
+define void @test7() nounwind {
+ bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1]
+ shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
+ store <4 x float> %2, <4 x float>* null
+ ret void
+
+; CHECK: test7:
+; CHECK: pxor %xmm0, %xmm0
+; CHECK: movaps %xmm0, 0
+}
+
+@x = external global [4 x i32]
+
+define <2 x i64> @test8() nounwind {
+ %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
+ %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
+ %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
+ %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
+ %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1]
+ %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
+ %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1]
+ %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1]
+ %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp16
+; CHECK: test8:
+; CHECK: movups (%eax), %xmm0
+}
+
+define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+ %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp13
+; CHECK: test9:
+; CHECK: movups 8(%esp), %xmm0
+}
+
+define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+ %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
+ %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
+ %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
+ %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp13
+; CHECK: test10:
+; CHECK: movaps 4(%esp), %xmm0
+}
+
+define <2 x double> @test11(double %a, double %b) nounwind {
+ %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
+ %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
+ ret <2 x double> %tmp7
+; CHECK: test11:
+; CHECK: movapd 4(%esp), %xmm0
+}
+
+define void @test12() nounwind {
+ %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
+ %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp4, <4 x float>* null
+ ret void
+; CHECK: test12:
+; CHECK: movhlps
+; CHECK: shufps
+}
+
+define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+ %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
+ %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1]
+ %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp11, <4 x float>* %res
+ ret void
+; CHECK: test13
+; CHECK: shufps $69, (%eax), %xmm0
+; CHECK: pshufd $-40, %xmm0, %xmm0
+}
+
+define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
+ %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
+ %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2]
+ %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
+ %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
+ %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp27
+; CHECK: test14:
+; CHECK: addps %xmm1, %xmm0
+; CHECK: subps %xmm1, %xmm2
+; CHECK: movlhps %xmm2, %xmm0
+}
+
+define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
+entry:
+ %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1]
+ %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
+ %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
+ ret <4 x float> %tmp4
+; CHECK: test15:
+; CHECK: movhlps %xmm1, %xmm0
+}
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index ef66d1a44a18..3a14fa26300c 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -224,3 +224,28 @@ declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+; This used to compile to insertps $0 + insertps $16. insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+ %tmp7 = extractelement <2 x float> %A, i32 0
+ %tmp5 = extractelement <2 x float> %A, i32 1
+ %tmp3 = extractelement <2 x float> %B, i32 0
+ %tmp1 = extractelement <2 x float> %B, i32 1
+ %add.r = fadd float %tmp7, %tmp3
+ %add.i = fadd float %tmp5, %tmp1
+ %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+ %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+ ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
diff --git a/test/CodeGen/X86/stack-color-with-reg.ll b/test/CodeGen/X86/stack-color-with-reg.ll
deleted file mode 100644
index 001a54096408..000000000000
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ /dev/null
@@ -1,361 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN: grep asm-printer %t | grep 166
-; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
-
- type { [62 x %struct.Bitvec*] } ; type %0
- type { i8* } ; type %1
- type { double } ; type %2
- %struct..5sPragmaType = type { i8*, i32 }
- %struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
- %struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
- %struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
- %struct.AuxData = type { i8*, void (i8*)* }
- %struct.Bitvec = type { i32, i32, i32, %0 }
- %struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
- %struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
- %struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
- %struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
- %struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
- %struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
- %struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
- %struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
- %struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
- %struct.Context = type { i64, i32, %struct.Fifo }
- %struct.CountCtx = type { i64 }
- %struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
- %struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
- %struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
- %struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
- %struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
- %struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
- %struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
- %struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
- %struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
- %struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
- %struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
- %struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
- %struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
- %struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
- %struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
- %struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
- %struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
- %struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
- %struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
- %struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
- %struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
- %struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
- %struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
- %struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
- %struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
- %struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
- %struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
- %struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
- %struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
- %struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
- %struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
- %struct._OvflCell = type { i8*, i16 }
- %struct._ht = type { i32, %struct.HashElem* }
- %struct.sColMap = type { i32, i8* }
- %struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
- %struct.sqlite3InitInfo = type { i32, i32, i8 }
- %struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
- %struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
- %struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
- %struct.sqlite3_index_constraint_usage = type { i32, i8 }
- %struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
- %struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
- %struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
- %struct.sqlite3_mutex = type opaque
- %struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
- %struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
- %struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.MemPage*, i32, i32)* @dropCell to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
-entry:
- %0 = getelementptr %struct.MemPage* %pPage, i64 0, i32 18 ; <i8**> [#uses=1]
- %1 = load i8** %0, align 8 ; <i8*> [#uses=34]
- %2 = getelementptr %struct.MemPage* %pPage, i64 0, i32 12 ; <i16*> [#uses=1]
- %3 = load i16* %2, align 2 ; <i16> [#uses=1]
- %4 = zext i16 %3 to i32 ; <i32> [#uses=2]
- %5 = shl i32 %idx, 1 ; <i32> [#uses=2]
- %6 = add i32 %4, %5 ; <i32> [#uses=1]
- %7 = sext i32 %6 to i64 ; <i64> [#uses=2]
- %8 = getelementptr i8* %1, i64 %7 ; <i8*> [#uses=1]
- %9 = load i8* %8, align 1 ; <i8> [#uses=2]
- %10 = zext i8 %9 to i32 ; <i32> [#uses=1]
- %11 = shl i32 %10, 8 ; <i32> [#uses=1]
- %.sum3 = add i64 %7, 1 ; <i64> [#uses=1]
- %12 = getelementptr i8* %1, i64 %.sum3 ; <i8*> [#uses=1]
- %13 = load i8* %12, align 1 ; <i8> [#uses=2]
- %14 = zext i8 %13 to i32 ; <i32> [#uses=1]
- %15 = or i32 %11, %14 ; <i32> [#uses=3]
- %16 = icmp slt i32 %sz, 4 ; <i1> [#uses=1]
- %size_addr.0.i = select i1 %16, i32 4, i32 %sz ; <i32> [#uses=3]
- %17 = getelementptr %struct.MemPage* %pPage, i64 0, i32 8 ; <i8*> [#uses=5]
- %18 = load i8* %17, align 8 ; <i8> [#uses=1]
- %19 = zext i8 %18 to i32 ; <i32> [#uses=4]
- %20 = add i32 %19, 1 ; <i32> [#uses=2]
- br label %bb3.i
-
-bb3.i: ; preds = %bb3.i, %entry
- %addr.0.i = phi i32 [ %20, %entry ], [ %29, %bb3.i ] ; <i32> [#uses=1]
- %21 = sext i32 %addr.0.i to i64 ; <i64> [#uses=2]
- %22 = getelementptr i8* %1, i64 %21 ; <i8*> [#uses=2]
- %23 = load i8* %22, align 1 ; <i8> [#uses=2]
- %24 = zext i8 %23 to i32 ; <i32> [#uses=1]
- %25 = shl i32 %24, 8 ; <i32> [#uses=1]
- %.sum34.i = add i64 %21, 1 ; <i64> [#uses=1]
- %26 = getelementptr i8* %1, i64 %.sum34.i ; <i8*> [#uses=2]
- %27 = load i8* %26, align 1 ; <i8> [#uses=2]
- %28 = zext i8 %27 to i32 ; <i32> [#uses=1]
- %29 = or i32 %25, %28 ; <i32> [#uses=3]
- %.not.i = icmp uge i32 %29, %15 ; <i1> [#uses=1]
- %30 = icmp eq i32 %29, 0 ; <i1> [#uses=1]
- %or.cond.i = or i1 %30, %.not.i ; <i1> [#uses=1]
- br i1 %or.cond.i, label %bb5.i, label %bb3.i
-
-bb5.i: ; preds = %bb3.i
- store i8 %9, i8* %22, align 1
- store i8 %13, i8* %26, align 1
- %31 = zext i32 %15 to i64 ; <i64> [#uses=2]
- %32 = getelementptr i8* %1, i64 %31 ; <i8*> [#uses=1]
- store i8 %23, i8* %32, align 1
- %.sum32.i = add i64 %31, 1 ; <i64> [#uses=1]
- %33 = getelementptr i8* %1, i64 %.sum32.i ; <i8*> [#uses=1]
- store i8 %27, i8* %33, align 1
- %34 = add i32 %15, 2 ; <i32> [#uses=1]
- %35 = zext i32 %34 to i64 ; <i64> [#uses=2]
- %36 = getelementptr i8* %1, i64 %35 ; <i8*> [#uses=1]
- %37 = lshr i32 %size_addr.0.i, 8 ; <i32> [#uses=1]
- %38 = trunc i32 %37 to i8 ; <i8> [#uses=1]
- store i8 %38, i8* %36, align 1
- %39 = trunc i32 %size_addr.0.i to i8 ; <i8> [#uses=1]
- %.sum31.i = add i64 %35, 1 ; <i64> [#uses=1]
- %40 = getelementptr i8* %1, i64 %.sum31.i ; <i8*> [#uses=1]
- store i8 %39, i8* %40, align 1
- %41 = getelementptr %struct.MemPage* %pPage, i64 0, i32 14 ; <i16*> [#uses=4]
- %42 = load i16* %41, align 2 ; <i16> [#uses=1]
- %43 = trunc i32 %size_addr.0.i to i16 ; <i16> [#uses=1]
- %44 = add i16 %42, %43 ; <i16> [#uses=1]
- store i16 %44, i16* %41, align 2
- %45 = load i8* %17, align 8 ; <i8> [#uses=1]
- %46 = zext i8 %45 to i32 ; <i32> [#uses=1]
- %47 = add i32 %46, 1 ; <i32> [#uses=1]
- br label %bb11.outer.i
-
-bb11.outer.i: ; preds = %bb6.i, %bb5.i
- %addr.1.ph.i = phi i32 [ %47, %bb5.i ], [ %111, %bb6.i ] ; <i32> [#uses=1]
- %48 = sext i32 %addr.1.ph.i to i64 ; <i64> [#uses=2]
- %49 = getelementptr i8* %1, i64 %48 ; <i8*> [#uses=1]
- %.sum30.i = add i64 %48, 1 ; <i64> [#uses=1]
- %50 = getelementptr i8* %1, i64 %.sum30.i ; <i8*> [#uses=1]
- br label %bb11.i
-
-bb6.i: ; preds = %bb11.i
- %51 = zext i32 %111 to i64 ; <i64> [#uses=2]
- %52 = getelementptr i8* %1, i64 %51 ; <i8*> [#uses=2]
- %53 = load i8* %52, align 1 ; <i8> [#uses=1]
- %54 = zext i8 %53 to i32 ; <i32> [#uses=1]
- %55 = shl i32 %54, 8 ; <i32> [#uses=1]
- %.sum24.i = add i64 %51, 1 ; <i64> [#uses=1]
- %56 = getelementptr i8* %1, i64 %.sum24.i ; <i8*> [#uses=2]
- %57 = load i8* %56, align 1 ; <i8> [#uses=3]
- %58 = zext i8 %57 to i32 ; <i32> [#uses=1]
- %59 = or i32 %55, %58 ; <i32> [#uses=5]
- %60 = add i32 %111, 2 ; <i32> [#uses=1]
- %61 = zext i32 %60 to i64 ; <i64> [#uses=2]
- %62 = getelementptr i8* %1, i64 %61 ; <i8*> [#uses=2]
- %63 = load i8* %62, align 1 ; <i8> [#uses=1]
- %64 = zext i8 %63 to i32 ; <i32> [#uses=1]
- %65 = shl i32 %64, 8 ; <i32> [#uses=1]
- %.sum23.i = add i64 %61, 1 ; <i64> [#uses=1]
- %66 = getelementptr i8* %1, i64 %.sum23.i ; <i8*> [#uses=2]
- %67 = load i8* %66, align 1 ; <i8> [#uses=2]
- %68 = zext i8 %67 to i32 ; <i32> [#uses=1]
- %69 = or i32 %65, %68 ; <i32> [#uses=1]
- %70 = add i32 %111, 3 ; <i32> [#uses=1]
- %71 = add i32 %70, %69 ; <i32> [#uses=1]
- %72 = icmp sge i32 %71, %59 ; <i1> [#uses=1]
- %73 = icmp ne i32 %59, 0 ; <i1> [#uses=1]
- %74 = and i1 %72, %73 ; <i1> [#uses=1]
- br i1 %74, label %bb9.i, label %bb11.outer.i
-
-bb9.i: ; preds = %bb6.i
- %75 = load i8* %17, align 8 ; <i8> [#uses=1]
- %76 = zext i8 %75 to i32 ; <i32> [#uses=1]
- %77 = add i32 %76, 7 ; <i32> [#uses=1]
- %78 = zext i32 %77 to i64 ; <i64> [#uses=1]
- %79 = getelementptr i8* %1, i64 %78 ; <i8*> [#uses=2]
- %80 = load i8* %79, align 1 ; <i8> [#uses=1]
- %81 = sub i8 %109, %57 ; <i8> [#uses=1]
- %82 = add i8 %81, %67 ; <i8> [#uses=1]
- %83 = add i8 %82, %80 ; <i8> [#uses=1]
- store i8 %83, i8* %79, align 1
- %84 = zext i32 %59 to i64 ; <i64> [#uses=2]
- %85 = getelementptr i8* %1, i64 %84 ; <i8*> [#uses=1]
- %86 = load i8* %85, align 1 ; <i8> [#uses=1]
- store i8 %86, i8* %52, align 1
- %.sum22.i = add i64 %84, 1 ; <i64> [#uses=1]
- %87 = getelementptr i8* %1, i64 %.sum22.i ; <i8*> [#uses=1]
- %88 = load i8* %87, align 1 ; <i8> [#uses=1]
- store i8 %88, i8* %56, align 1
- %89 = add i32 %59, 2 ; <i32> [#uses=1]
- %90 = zext i32 %89 to i64 ; <i64> [#uses=2]
- %91 = getelementptr i8* %1, i64 %90 ; <i8*> [#uses=1]
- %92 = load i8* %91, align 1 ; <i8> [#uses=1]
- %93 = zext i8 %92 to i32 ; <i32> [#uses=1]
- %94 = shl i32 %93, 8 ; <i32> [#uses=1]
- %.sum20.i = add i64 %90, 1 ; <i64> [#uses=1]
- %95 = getelementptr i8* %1, i64 %.sum20.i ; <i8*> [#uses=2]
- %96 = load i8* %95, align 1 ; <i8> [#uses=1]
- %97 = zext i8 %96 to i32 ; <i32> [#uses=1]
- %98 = or i32 %94, %97 ; <i32> [#uses=1]
- %99 = sub i32 %59, %111 ; <i32> [#uses=1]
- %100 = add i32 %99, %98 ; <i32> [#uses=1]
- %101 = lshr i32 %100, 8 ; <i32> [#uses=1]
- %102 = trunc i32 %101 to i8 ; <i8> [#uses=1]
- store i8 %102, i8* %62, align 1
- %103 = load i8* %95, align 1 ; <i8> [#uses=1]
- %104 = sub i8 %57, %109 ; <i8> [#uses=1]
- %105 = add i8 %104, %103 ; <i8> [#uses=1]
- store i8 %105, i8* %66, align 1
- br label %bb11.i
-
-bb11.i: ; preds = %bb9.i, %bb11.outer.i
- %106 = load i8* %49, align 1 ; <i8> [#uses=1]
- %107 = zext i8 %106 to i32 ; <i32> [#uses=1]
- %108 = shl i32 %107, 8 ; <i32> [#uses=1]
- %109 = load i8* %50, align 1 ; <i8> [#uses=3]
- %110 = zext i8 %109 to i32 ; <i32> [#uses=1]
- %111 = or i32 %108, %110 ; <i32> [#uses=6]
- %112 = icmp eq i32 %111, 0 ; <i1> [#uses=1]
- br i1 %112, label %bb12.i, label %bb6.i
-
-bb12.i: ; preds = %bb11.i
- %113 = zext i32 %20 to i64 ; <i64> [#uses=2]
- %114 = getelementptr i8* %1, i64 %113 ; <i8*> [#uses=2]
- %115 = load i8* %114, align 1 ; <i8> [#uses=2]
- %116 = add i32 %19, 5 ; <i32> [#uses=1]
- %117 = zext i32 %116 to i64 ; <i64> [#uses=2]
- %118 = getelementptr i8* %1, i64 %117 ; <i8*> [#uses=3]
- %119 = load i8* %118, align 1 ; <i8> [#uses=1]
- %120 = icmp eq i8 %115, %119 ; <i1> [#uses=1]
- br i1 %120, label %bb13.i, label %bb1.preheader
-
-bb13.i: ; preds = %bb12.i
- %121 = add i32 %19, 2 ; <i32> [#uses=1]
- %122 = zext i32 %121 to i64 ; <i64> [#uses=1]
- %123 = getelementptr i8* %1, i64 %122 ; <i8*> [#uses=1]
- %124 = load i8* %123, align 1 ; <i8> [#uses=1]
- %125 = add i32 %19, 6 ; <i32> [#uses=1]
- %126 = zext i32 %125 to i64 ; <i64> [#uses=1]
- %127 = getelementptr i8* %1, i64 %126 ; <i8*> [#uses=1]
- %128 = load i8* %127, align 1 ; <i8> [#uses=1]
- %129 = icmp eq i8 %124, %128 ; <i1> [#uses=1]
- br i1 %129, label %bb14.i, label %bb1.preheader
-
-bb14.i: ; preds = %bb13.i
- %130 = zext i8 %115 to i32 ; <i32> [#uses=1]
- %131 = shl i32 %130, 8 ; <i32> [#uses=1]
- %.sum29.i = add i64 %113, 1 ; <i64> [#uses=1]
- %132 = getelementptr i8* %1, i64 %.sum29.i ; <i8*> [#uses=1]
- %133 = load i8* %132, align 1 ; <i8> [#uses=1]
- %134 = zext i8 %133 to i32 ; <i32> [#uses=1]
- %135 = or i32 %134, %131 ; <i32> [#uses=2]
- %136 = zext i32 %135 to i64 ; <i64> [#uses=1]
- %137 = getelementptr i8* %1, i64 %136 ; <i8*> [#uses=1]
- %138 = bitcast i8* %137 to i16* ; <i16*> [#uses=1]
- %139 = bitcast i8* %114 to i16* ; <i16*> [#uses=1]
- %tmp.i = load i16* %138, align 1 ; <i16> [#uses=1]
- store i16 %tmp.i, i16* %139, align 1
- %140 = load i8* %118, align 1 ; <i8> [#uses=1]
- %141 = zext i8 %140 to i32 ; <i32> [#uses=1]
- %142 = shl i32 %141, 8 ; <i32> [#uses=1]
- %.sum28.i = add i64 %117, 1 ; <i64> [#uses=1]
- %143 = getelementptr i8* %1, i64 %.sum28.i ; <i8*> [#uses=2]
- %144 = load i8* %143, align 1 ; <i8> [#uses=2]
- %145 = zext i8 %144 to i32 ; <i32> [#uses=1]
- %146 = or i32 %142, %145 ; <i32> [#uses=1]
- %147 = add i32 %135, 2 ; <i32> [#uses=1]
- %148 = zext i32 %147 to i64 ; <i64> [#uses=2]
- %149 = getelementptr i8* %1, i64 %148 ; <i8*> [#uses=1]
- %150 = load i8* %149, align 1 ; <i8> [#uses=1]
- %151 = zext i8 %150 to i32 ; <i32> [#uses=1]
- %152 = shl i32 %151, 8 ; <i32> [#uses=1]
- %.sum27.i = add i64 %148, 1 ; <i64> [#uses=1]
- %153 = getelementptr i8* %1, i64 %.sum27.i ; <i8*> [#uses=2]
- %154 = load i8* %153, align 1 ; <i8> [#uses=1]
- %155 = zext i8 %154 to i32 ; <i32> [#uses=1]
- %156 = or i32 %152, %155 ; <i32> [#uses=1]
- %157 = add i32 %156, %146 ; <i32> [#uses=1]
- %158 = lshr i32 %157, 8 ; <i32> [#uses=1]
- %159 = trunc i32 %158 to i8 ; <i8> [#uses=1]
- store i8 %159, i8* %118, align 1
- %160 = load i8* %153, align 1 ; <i8> [#uses=1]
- %161 = add i8 %160, %144 ; <i8> [#uses=1]
- store i8 %161, i8* %143, align 1
- br label %bb1.preheader
-
-bb1.preheader: ; preds = %bb14.i, %bb13.i, %bb12.i
- %i.08 = add i32 %idx, 1 ; <i32> [#uses=2]
- %162 = getelementptr %struct.MemPage* %pPage, i64 0, i32 15 ; <i16*> [#uses=4]
- %163 = load i16* %162, align 4 ; <i16> [#uses=2]
- %164 = zext i16 %163 to i32 ; <i32> [#uses=1]
- %165 = icmp sgt i32 %164, %i.08 ; <i1> [#uses=1]
- br i1 %165, label %bb, label %bb2
-
-bb: ; preds = %bb, %bb1.preheader
- %indvar = phi i64 [ 0, %bb1.preheader ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
- %tmp16 = add i32 %5, %4 ; <i32> [#uses=1]
- %tmp.17 = sext i32 %tmp16 to i64 ; <i64> [#uses=1]
- %tmp19 = shl i64 %indvar, 1 ; <i64> [#uses=1]
- %ctg2.sum = add i64 %tmp.17, %tmp19 ; <i64> [#uses=4]
- %ctg229 = getelementptr i8* %1, i64 %ctg2.sum ; <i8*> [#uses=1]
- %ctg229.sum31 = add i64 %ctg2.sum, 2 ; <i64> [#uses=1]
- %166 = getelementptr i8* %1, i64 %ctg229.sum31 ; <i8*> [#uses=1]
- %167 = load i8* %166, align 1 ; <i8> [#uses=1]
- store i8 %167, i8* %ctg229
- %ctg229.sum30 = add i64 %ctg2.sum, 3 ; <i64> [#uses=1]
- %168 = getelementptr i8* %1, i64 %ctg229.sum30 ; <i8*> [#uses=1]
- %169 = load i8* %168, align 1 ; <i8> [#uses=1]
- %ctg229.sum = add i64 %ctg2.sum, 1 ; <i64> [#uses=1]
- %170 = getelementptr i8* %1, i64 %ctg229.sum ; <i8*> [#uses=1]
- store i8 %169, i8* %170, align 1
- %indvar15 = trunc i64 %indvar to i32 ; <i32> [#uses=1]
- %i.09 = add i32 %indvar15, %i.08 ; <i32> [#uses=1]
- %i.0 = add i32 %i.09, 1 ; <i32> [#uses=1]
- %171 = load i16* %162, align 4 ; <i16> [#uses=2]
- %172 = zext i16 %171 to i32 ; <i32> [#uses=1]
- %173 = icmp sgt i32 %172, %i.0 ; <i1> [#uses=1]
- %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=1]
- br i1 %173, label %bb, label %bb2
-
-bb2: ; preds = %bb, %bb1.preheader
- %174 = phi i16 [ %163, %bb1.preheader ], [ %171, %bb ] ; <i16> [#uses=1]
- %175 = add i16 %174, -1 ; <i16> [#uses=2]
- store i16 %175, i16* %162, align 4
- %176 = load i8* %17, align 8 ; <i8> [#uses=1]
- %177 = zext i8 %176 to i32 ; <i32> [#uses=1]
- %178 = add i32 %177, 3 ; <i32> [#uses=1]
- %179 = zext i32 %178 to i64 ; <i64> [#uses=1]
- %180 = getelementptr i8* %1, i64 %179 ; <i8*> [#uses=1]
- %181 = lshr i16 %175, 8 ; <i16> [#uses=1]
- %182 = trunc i16 %181 to i8 ; <i8> [#uses=1]
- store i8 %182, i8* %180, align 1
- %183 = load i8* %17, align 8 ; <i8> [#uses=1]
- %184 = zext i8 %183 to i32 ; <i32> [#uses=1]
- %185 = add i32 %184, 3 ; <i32> [#uses=1]
- %186 = zext i32 %185 to i64 ; <i64> [#uses=1]
- %187 = load i16* %162, align 4 ; <i16> [#uses=1]
- %188 = trunc i16 %187 to i8 ; <i8> [#uses=1]
- %.sum = add i64 %186, 1 ; <i64> [#uses=1]
- %189 = getelementptr i8* %1, i64 %.sum ; <i8*> [#uses=1]
- store i8 %188, i8* %189, align 1
- %190 = load i16* %41, align 2 ; <i16> [#uses=1]
- %191 = add i16 %190, 2 ; <i16> [#uses=1]
- store i16 %191, i16* %41, align 2
- %192 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1 ; <i8*> [#uses=1]
- store i8 1, i8* %192, align 1
- ret void
-}
diff --git a/test/CodeGen/X86/stdcall.ll b/test/CodeGen/X86/stdcall.ll
index 70204bcf4745..a7c2517e7dbe 100644
--- a/test/CodeGen/X86/stdcall.ll
+++ b/test/CodeGen/X86/stdcall.ll
@@ -2,7 +2,7 @@
; PR5851
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
%0 = type { void (...)* }
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 5682e7caf8bd..abc5174c98de 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -1,6 +1,6 @@
; rdar://7860110
-; RUN: llc < %s | FileCheck %s -check-prefix=X64
-; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=X32
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.2"
@@ -125,3 +125,30 @@ entry:
; X32: movb %cl, 5(%{{.*}})
}
+; PR7833
+
+@g_16 = internal global i32 -1
+
+; X64: test8:
+; X64-NEXT: movl _g_16(%rip), %eax
+; X64-NEXT: movl $0, _g_16(%rip)
+; X64-NEXT: orl $1, %eax
+; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: ret
+define void @test8() nounwind {
+ %tmp = load i32* @g_16
+ store i32 0, i32* @g_16
+ %or = or i32 %tmp, 1
+ store i32 %or, i32* @g_16
+ ret void
+}
+
+; X64: test9:
+; X64-NEXT: orb $1, _g_16(%rip)
+; X64-NEXT: ret
+define void @test9() nounwind {
+ %tmp = load i32* @g_16
+ %or = or i32 %tmp, 1
+ store i32 %or, i32* @g_16
+ ret void
+}
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index d54fb4115b07..7f92af4dca9f 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,8 +1,6 @@
-; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL
+; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
-; Fast-isel shouldn't attempt to handle this tail call, and it should
-; cleanly terminate instruction selection in the block after it's
-; done to avoid emitting invalid MachineInstrs.
+; Fast-isel shouldn't attempt to cope with tail calls.
%0 = type { i64, i32, i8* }
@@ -11,3 +9,11 @@ fail: ; preds = %entry
%tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
ret i8* %tmp20
}
+
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 (...)* @bar() nounwind ; <i32> [#uses=1]
+ ret i32 %0
+}
+
+declare i32 @bar(...) nounwind
diff --git a/test/CodeGen/X86/twoaddr-coalesce.ll b/test/CodeGen/X86/twoaddr-coalesce.ll
index 4c37225ce027..6f6d6f2cd967 100644
--- a/test/CodeGen/X86/twoaddr-coalesce.ll
+++ b/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -3,7 +3,7 @@
@"\01LC" = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
-define i32 @main() nounwind {
+define i32 @foo() nounwind {
bb1.thread:
br label %bb1
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 9c4b773a6190..76c3fdfc060c 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -10,15 +10,16 @@ define void @test1(<2 x float> %Q, float *%P2) nounwind {
store float %c, float* %P2
ret void
; X64: test1:
-; X64-NEXT: addss %xmm1, %xmm0
-; X64-NEXT: movss %xmm0, (%rdi)
+; X64-NEXT: pshufd $1, %xmm0, %xmm1
+; X64-NEXT: addss %xmm0, %xmm1
+; X64-NEXT: movss %xmm1, (%rdi)
; X64-NEXT: ret
; X32: test1:
-; X32-NEXT: movss 4(%esp), %xmm0
-; X32-NEXT: addss 8(%esp), %xmm0
-; X32-NEXT: movl 12(%esp), %eax
-; X32-NEXT: movss %xmm0, (%eax)
+; X32-NEXT: pshufd $1, %xmm0, %xmm1
+; X32-NEXT: addss %xmm0, %xmm1
+; X32-NEXT: movl 4(%esp), %eax
+; X32-NEXT: movss %xmm1, (%eax)
; X32-NEXT: ret
}
@@ -28,12 +29,42 @@ define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounw
ret <2 x float> %Z
; X64: test2:
-; X64-NEXT: insertps $0
-; X64-NEXT: insertps $16
-; X64-NEXT: insertps $0
-; X64-NEXT: insertps $16
-; X64-NEXT: addps
-; X64-NEXT: movaps
-; X64-NEXT: pshufd
+; X64-NEXT: addps %xmm1, %xmm0
; X64-NEXT: ret
}
+
+
+define <2 x float> @test3(<4 x float> %A) nounwind {
+ %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %C = fadd <2 x float> %B, %B
+ ret <2 x float> %C
+; CHECK: test3:
+; CHECK-NEXT: addps %xmm0, %xmm0
+; CHECK-NEXT: ret
+}
+
+define <2 x float> @test4(<2 x float> %A) nounwind {
+ %C = fadd <2 x float> %A, %A
+ ret <2 x float> %C
+; CHECK: test4:
+; CHECK-NEXT: addps %xmm0, %xmm0
+; CHECK-NEXT: ret
+}
+
+define <4 x float> @test5(<4 x float> %A) nounwind {
+ %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %C = fadd <2 x float> %B, %B
+ br label %BB
+
+BB:
+ %D = fadd <2 x float> %C, %C
+ %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ ret <4 x float> %E
+
+; CHECK: _test5:
+; CHECK-NEXT: addps %xmm0, %xmm0
+; CHECK-NEXT: addps %xmm0, %xmm0
+; CHECK-NEXT: ret
+}
+
+
diff --git a/test/CodeGen/X86/vec_cast.ll b/test/CodeGen/X86/vec_cast.ll
index 6f18d13cc9d3..f8531646effa 100644
--- a/test/CodeGen/X86/vec_cast.ll
+++ b/test/CodeGen/X86/vec_cast.ll
@@ -1,15 +1,16 @@
-; RUN: llc < %s -march=x86-64
-; RUN: llc < %s -march=x86-64 -disable-mmx
+; RUN: llc < %s -march=x86-64 -mcpu=core2
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -disable-mmx
+
define <8 x i32> @a(<8 x i16> %a) nounwind {
%c = sext <8 x i16> %a to <8 x i32>
ret <8 x i32> %c
}
-define <3 x i32> @b(<3 x i16> %a) nounwind {
- %c = sext <3 x i16> %a to <3 x i32>
- ret <3 x i32> %c
-}
+;define <3 x i32> @b(<3 x i16> %a) nounwind {
+; %c = sext <3 x i16> %a to <3 x i32>
+; ret <3 x i32> %c
+;}
define <1 x i32> @c(<1 x i16> %a) nounwind {
%c = sext <1 x i16> %a to <1 x i32>
@@ -21,10 +22,10 @@ define <8 x i32> @d(<8 x i16> %a) nounwind {
ret <8 x i32> %c
}
-define <3 x i32> @e(<3 x i16> %a) nounwind {
- %c = zext <3 x i16> %a to <3 x i32>
- ret <3 x i32> %c
-}
+;define <3 x i32> @e(<3 x i16> %a) nounwind {
+; %c = zext <3 x i16> %a to <3 x i32>
+; ret <3 x i32> %c
+;}
define <1 x i32> @f(<1 x i16> %a) nounwind {
%c = zext <1 x i16> %a to <1 x i32>
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
index 54aa43f0c35d..de3b36ff126c 100644
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ b/test/CodeGen/X86/vec_insert-6.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 6
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
define <4 x float> @t3(<4 x float>* %P) nounwind {
%tmp1 = load <4 x float>* %P
diff --git a/test/CodeGen/X86/vec_insert-9.ll b/test/CodeGen/X86/vec_insert-9.ll
index 2e829df1f8df..e5a7ccc5ef94 100644
--- a/test/CodeGen/X86/vec_insert-9.ll
+++ b/test/CodeGen/X86/vec_insert-9.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
-; RUN: grep pinsrd %t | count 2
+; RUN: grep pinsrd %t | count 1
define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind {
entry:
diff --git a/test/CodeGen/X86/vec_shift4.ll b/test/CodeGen/X86/vec_shift4.ll
new file mode 100644
index 000000000000..9ef7fbdb0c50
--- /dev/null
+++ b/test/CodeGen/X86/vec_shift4.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shll
+; CHECK: pslld
+; CHECK: paddd
+; CHECK: cvttps2dq
+; CHECK: pmulld
+
+ %shl = shl <4 x i32> %r, %a ; <<4 x i32>> [#uses=1]
+ %tmp2 = bitcast <4 x i32> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+ %shl = shl <16 x i8> %r, %a ; <<16 x i8>> [#uses=1]
+ %tmp2 = bitcast <16 x i8> %shl to <2 x i64> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp2
+}
diff --git a/test/CodeGen/X86/vec_shuffle-10.ll b/test/CodeGen/X86/vec_shuffle-10.ll
deleted file mode 100644
index a63e3868ad75..000000000000
--- a/test/CodeGen/X86/vec_shuffle-10.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep unpcklps %t | count 1
-; RUN: grep pshufd %t | count 1
-; RUN: not grep {sub.*esp} %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) {
- %tmp = load <4 x float>* %B ; <<4 x float>> [#uses=2]
- %tmp3 = load <4 x float>* %A ; <<4 x float>> [#uses=2]
- %tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0 ; <float> [#uses=1]
- %tmp7 = extractelement <4 x float> %tmp, i32 0 ; <float> [#uses=1]
- %tmp8 = extractelement <4 x float> %tmp3, i32 1 ; <float> [#uses=1]
- %tmp9 = extractelement <4 x float> %tmp, i32 1 ; <float> [#uses=1]
- %tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp13, <4 x float>* %res
- ret void
-}
-
-define void @test2(<4 x float> %X, <4 x float>* %res) {
- %tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp5, <4 x float>* %res
- ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
index 9fc09dfdd2b8..861a1cc5b93c 100644
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ b/test/CodeGen/X86/vec_shuffle-19.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
; PR2485
define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 6d1bac0743d4..fc06b9514e43 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind {
entry:
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 7562f1d89594..1b104deb3055 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
define i32 @t() nounwind optsize {
entry:
+; CHECK: punpckldq
%a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
%b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5]
volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
diff --git a/test/CodeGen/X86/vec_shuffle-3.ll b/test/CodeGen/X86/vec_shuffle-3.ll
deleted file mode 100644
index f4930b084504..000000000000
--- a/test/CodeGen/X86/vec_shuffle-3.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movlhps %t | count 1
-; RUN: grep movhlps %t | count 1
-
-define <4 x float> @test1(<4 x float>* %x, <4 x float>* %y) {
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=2]
- %tmp5 = load <4 x float>* %x ; <<4 x float>> [#uses=2]
- %tmp9 = fadd <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
- %tmp21 = fsub <4 x float> %tmp5, %tmp ; <<4 x float>> [#uses=1]
- %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp27
-}
-
-define <4 x float> @movhl(<4 x float>* %x, <4 x float>* %y) {
-entry:
- %tmp = load <4 x float>* %y ; <<4 x float>> [#uses=1]
- %tmp3 = load <4 x float>* %x ; <<4 x float>> [#uses=1]
- %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp4
-}
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
new file mode 100644
index 000000000000..1ed858de64e8
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
+entry:
+; CHECK: movaps (%rdi), %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: movlps (%rax), %xmm1
+; CHECK-NEXT: shufps $36, %xmm1, %xmm0
+ %0 = load <4 x i32>* undef, align 16
+ %1 = load <4 x i32>* %a0, align 16
+ %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+ ret <4 x i32> %2
+}
+
diff --git a/test/CodeGen/X86/vec_shuffle-4.ll b/test/CodeGen/X86/vec_shuffle-4.ll
deleted file mode 100644
index 829fedf97cc5..000000000000
--- a/test/CodeGen/X86/vec_shuffle-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep shuf %t | count 2
-; RUN: not grep unpck %t
-
-define void @test(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) {
- %tmp3 = load <4 x float>* %B ; <<4 x float>> [#uses=1]
- %tmp5 = load <4 x float>* %C ; <<4 x float>> [#uses=1]
- %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp11, <4 x float>* %res
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-5.ll b/test/CodeGen/X86/vec_shuffle-5.ll
deleted file mode 100644
index c24167a6150d..000000000000
--- a/test/CodeGen/X86/vec_shuffle-5.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movhlps %t | count 1
-; RUN: grep shufps %t | count 1
-
-define void @test() nounwind {
- %tmp1 = load <4 x float>* null ; <<4 x float>> [#uses=2]
- %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- %tmp4 = fadd <4 x float> %tmp2, %tmp3 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp4, <4 x float>* null
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-6.ll b/test/CodeGen/X86/vec_shuffle-6.ll
deleted file mode 100644
index 28fd59b29dd3..000000000000
--- a/test/CodeGen/X86/vec_shuffle-6.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movapd %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: grep movups %t | count 2
-
-target triple = "i686-apple-darwin"
-@x = external global [4 x i32]
-
-define <2 x i64> @test1() {
- %tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0) ; <i32> [#uses=1]
- %tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1) ; <i32> [#uses=1]
- %tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2) ; <i32> [#uses=1]
- %tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3) ; <i32> [#uses=1]
- %tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0 ; <<4 x i32>> [#uses=1]
- %tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1 ; <<4 x i32>> [#uses=1]
- %tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2 ; <<4 x i32>> [#uses=1]
- %tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3 ; <<4 x i32>> [#uses=1]
- %tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64> ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %tmp16
-}
-
-define <4 x float> @test2(i32 %dummy, float %a, float %b, float %c, float %d) {
- %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp13
-}
-
-define <4 x float> @test3(float %a, float %b, float %c, float %d) {
- %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1]
- %tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2 ; <<4 x float>> [#uses=1]
- %tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp13
-}
-
-define <2 x double> @test4(double %a, double %b) {
- %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1]
- %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
- ret <2 x double> %tmp7
-}
diff --git a/test/CodeGen/X86/vec_shuffle-7.ll b/test/CodeGen/X86/vec_shuffle-7.ll
deleted file mode 100644
index 64bd6a3c83b8..000000000000
--- a/test/CodeGen/X86/vec_shuffle-7.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep pxor %t | count 1
-; RUN: not grep shufps %t
-
-define void @test() {
- bitcast <4 x i32> zeroinitializer to <4 x float> ; <<4 x float>>:1 [#uses=1]
- shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
- store <4 x float> %2, <4 x float>* null
- unreachable
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-8.ll b/test/CodeGen/X86/vec_shuffle-8.ll
deleted file mode 100644
index 964ce7b2892b..000000000000
--- a/test/CodeGen/X86/vec_shuffle-8.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | \
-; RUN: not grep shufps
-
-define void @test(<4 x float>* %res, <4 x float>* %A) {
- %tmp1 = load <4 x float>* %A ; <<4 x float>> [#uses=1]
- %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp2, <4 x float>* %res
- ret void
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-9.ll b/test/CodeGen/X86/vec_shuffle-9.ll
deleted file mode 100644
index 07195869b8cf..000000000000
--- a/test/CodeGen/X86/vec_shuffle-9.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
-
-define <4 x i32> @test(i8** %ptr) {
-; CHECK: pxor
-; CHECK: punpcklbw
-; CHECK: punpcklwd
-
- %tmp = load i8** %ptr ; <i8*> [#uses=1]
- %tmp.upgrd.1 = bitcast i8* %tmp to float* ; <float*> [#uses=1]
- %tmp.upgrd.2 = load float* %tmp.upgrd.1 ; <float> [#uses=1]
- %tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0 ; <<4 x float>> [#uses=1]
- %tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1 ; <<4 x float>> [#uses=1]
- %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2 ; <<4 x float>> [#uses=1]
- %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=1]
- %tmp21 = bitcast <4 x float> %tmp11 to <16 x i8> ; <<16 x i8>> [#uses=1]
- %tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 > ; <<16 x i8>> [#uses=1]
- %tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16> ; <<8 x i16>> [#uses=1]
- %tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
- %tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32> ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %tmp36
-}
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 25dde57c767e..463f522a11df 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -3,7 +3,8 @@
; widening shuffle v3float and then a add
define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
entry:
-; CHECK: insertps
+; CHECK: shuf:
+; CHECK: extractps
; CHECK: extractps
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
%val = fadd <3 x float> %x, %src2
@@ -15,7 +16,8 @@ entry:
; widening shuffle v3float with a different mask and then a add
define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
entry:
-; CHECK: insertps
+; CHECK: shuf2:
+; CHECK: extractps
; CHECK: extractps
%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
%val = fadd <3 x float> %x, %src2
@@ -26,7 +28,7 @@ entry:
; Example of when widening a v3float operation causes the DAG to replace a node
; with the operation that we are currently widening, i.e. when replacing
; opA with opB, the DAG will produce new operations with opA.
-define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) {
+define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
entry:
; CHECK: pshufd
%shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
diff --git a/test/CodeGen/X86/win_chkstk.ll b/test/CodeGen/X86/win_chkstk.ll
new file mode 100644
index 000000000000..27d3358d4ac1
--- /dev/null
+++ b/test/CodeGen/X86/win_chkstk.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; Windows and mingw require a prologue helper routine if more than 4096 bytes area
+; allocated on the stack. Windows uses __chkstk and mingw uses __alloca. __alloca
+; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer.
+; The 64-bit version of __chkstk is only responsible for probing the stack. The 64-bit
+; prologue is responsible for adjusting the stack pointer.
+
+; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
+define i32 @main4k() nounwind {
+entry:
+; WIN_X32: call __chkstk
+; WIN_X64: call __chkstk
+; MINGW_X32: call __alloca
+; MINGW_X64: call _alloca
+; LINUX-NOT: call __chkstk
+ %array4096 = alloca [4096 x i8], align 16 ; <[4096 x i8]*> [#uses=0]
+ ret i32 0
+}
+
+; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack
+; allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X32: # BB#0:
+; WIN_X32-NOT: call __chkstk
+; WIN_X32: ret
+
+; WIN_X64: # BB#0:
+; WIN_X64-NOT: call __chkstk
+; WIN_X64: ret
+
+; MINGW_X64: # BB#0:
+; MINGW_X64-NOT: call _alloca
+; MINGW_X64: ret
+
+; LINUX: # BB#0:
+; LINUX-NOT: call __chkstk
+; LINUX: ret
+ %array128 = alloca [128 x i8], align 16 ; <[128 x i8]*> [#uses=0]
+ ret i32 0
+}
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 3e3bb95d06f7..447007439fbb 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -march=x86-64 -stats -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12
; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
declare void @bar(double %x)
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
new file mode 100644
index 000000000000..87a4a8955a3e
--- /dev/null
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -0,0 +1,24 @@
+; RUN: llc -o /dev/null < %s
+; PR7662
+; Do not add variables to !11 because it is a declaration entry.
+
+define i32 @bar() nounwind readnone ssp {
+entry:
+ ret i32 42, !dbg !9
+}
+
+!llvm.dbg.sp = !{!0, !6, !11}
+!llvm.dbg.lv.foo = !{!7}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 4, i32 3, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/DebugInfo/2010-08-04-StackVariable.ll
new file mode 100644
index 000000000000..61cd20bb1ab3
--- /dev/null
+++ b/test/DebugInfo/2010-08-04-StackVariable.ll
@@ -0,0 +1,124 @@
+; RUN: llc -O0 < %s | grep DW_OP_fbreg
+; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
+
+%struct.SVal = type { i8*, i32 }
+
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+entry:
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
+ call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+ %0 = icmp ne i32 %i, 0, !dbg !27 ; <i1> [#uses=1]
+ br i1 %0, label %bb, label %bb1, !dbg !27
+
+bb: ; preds = %entry
+ %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+ %2 = load i32* %1, align 8, !dbg !29 ; <i32> [#uses=1]
+ %3 = add i32 %2, %i, !dbg !29 ; <i32> [#uses=1]
+ br label %bb2, !dbg !29
+
+bb1: ; preds = %entry
+ %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+ %5 = load i32* %4, align 8, !dbg !30 ; <i32> [#uses=1]
+ %6 = sub i32 %5, 1, !dbg !30 ; <i32> [#uses=1]
+ br label %bb2, !dbg !30
+
+bb2: ; preds = %bb1, %bb
+ %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ] ; <i32> [#uses=1]
+ br label %return, !dbg !29
+
+return: ; preds = %bb2
+ ret i32 %.0, !dbg !29
+}
+
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+entry:
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+ %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+ store i8* null, i8** %0, align 8, !dbg !34
+ %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+ store i32 0, i32* %1, align 8, !dbg !34
+ br label %return, !dbg !34
+
+return: ; preds = %entry
+ ret void, !dbg !35
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+ %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3]
+ %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4]
+ %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
+ call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+ call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
+ %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+ store i32 1, i32* %1, align 8, !dbg !42
+ %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+ %4 = load i8** %3, align 8, !dbg !43 ; <i8*> [#uses=1]
+ store i8* %4, i8** %2, align 8, !dbg !43
+ %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+ %7 = load i32* %6, align 8, !dbg !43 ; <i32> [#uses=1]
+ store i32 %7, i32* %5, align 8, !dbg !43
+ %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
+ call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+ br label %return, !dbg !45
+
+return: ; preds = %entry
+ ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
+!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{null, metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !12}
+!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{metadata !13, metadata !13, metadata !1}
+!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{metadata !13}
+!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 16, i32 0, metadata !17, null}
+!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!27 = metadata !{i32 17, i32 0, metadata !28, null}
+!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 18, i32 0, metadata !28, null}
+!30 = metadata !{i32 20, i32 0, metadata !28, null}
+!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 11, i32 0, metadata !16, null}
+!35 = metadata !{i32 11, i32 0, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 24, i32 0, metadata !39, null}
+!42 = metadata !{i32 25, i32 0, metadata !39, null}
+!43 = metadata !{i32 26, i32 0, metadata !39, null}
+!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!45 = metadata !{i32 27, i32 0, metadata !39, null}
diff --git a/test/DebugInfo/printdbginfo2.ll b/test/DebugInfo/printdbginfo2.ll
index e19395b0df1f..319379197477 100644
--- a/test/DebugInfo/printdbginfo2.ll
+++ b/test/DebugInfo/printdbginfo2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -print-dbginfo -disable-output | FileCheck %s
+; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s
; grep {%b is variable b of type x declared at x.c:7} %t1
; grep {%2 is variable b of type x declared at x.c:7} %t1
; grep {@c.1442 is variable c of type int declared at x.c:4} %t1
diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll
index 02a79f81cdf1..0c6bcd9abfe5 100644
--- a/test/Feature/NamedMDNode.ll
+++ b/test/Feature/NamedMDNode.ll
@@ -3,7 +3,7 @@
;; Simple NamedMDNode
!0 = metadata !{i32 42}
!1 = metadata !{metadata !"foo"}
-!llvm.stuff = !{!0, !1, null}
+!llvm.stuff = !{!0, !1}
!samename = !{!0, !1}
declare void @samename()
diff --git a/test/Feature/linker_private_linkages.ll b/test/Feature/linker_private_linkages.ll
index 19bcbb40aa01..f9f290875645 100644
--- a/test/Feature/linker_private_linkages.ll
+++ b/test/Feature/linker_private_linkages.ll
@@ -4,3 +4,4 @@
@foo = linker_private hidden global i32 0
@bar = linker_private_weak hidden global i32 0
+@qux = linker_private_weak_def_auto global i32 0
diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll
index d43815be46aa..9856b375495c 100644
--- a/test/Feature/metadata.ll
+++ b/test/Feature/metadata.ll
@@ -1,9 +1,11 @@
; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
; PR7105
-define void @foo() {
+define void @foo(i32 %x) {
call void @llvm.zonk(metadata !1, i64 0, metadata !1)
- ret void
+ store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"}
+ store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2}
+ ret void, !whatever !{i32 %x}
}
declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone
diff --git a/test/Feature/unions.ll b/test/Feature/unions.ll
deleted file mode 100644
index 3cf8c3ce0e97..000000000000
--- a/test/Feature/unions.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%union.anon = type union { i8, i32, float }
-
-@union1 = constant union { i32, i8 } { i32 4 }
-@union2 = constant union { i32, i8 } insertvalue(union { i32, i8 } undef, i32 4, 0)
-@union3 = common global %union.anon zeroinitializer, align 8
-
-define void @"Unions" () {
- ret void
-}
-
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
index a8eda77e4a72..e1cc81f40f79 100644
--- a/test/FrontendC++/2009-07-15-LineNumbers.cpp
+++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -1,7 +1,7 @@
// This is a regression test on debug info to make sure that we can
// print line numbers in asm.
// RUN: %llvmgcc -S -O0 -g %s -o - | \
-// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep { 2009-07-15-LineNumbers.cpp:25$}
+// RUN: llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$}
#include <stdlib.h>
diff --git a/test/FrontendC++/2010-07-19-nowarn.cpp b/test/FrontendC++/2010-07-19-nowarn.cpp
new file mode 100644
index 000000000000..8742bf152329
--- /dev/null
+++ b/test/FrontendC++/2010-07-19-nowarn.cpp
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc %s -c -m32 -fasm-blocks -o /dev/null
+// This should not warn about unreferenced label. 8195660.
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+void quarterAsm(int array[], int len)
+{
+ __asm
+ {
+ mov esi, array;
+ mov ecx, len;
+ shr ecx, 2;
+loop:
+ movdqa xmm0, [esi];
+ psrad xmm0, 2;
+ movdqa [esi], xmm0;
+ add esi, 16;
+ sub ecx, 1;
+ jnz loop;
+ }
+}
diff --git a/test/FrontendC++/2010-07-23-DeclLoc.cpp b/test/FrontendC++/2010-07-23-DeclLoc.cpp
new file mode 100644
index 000000000000..c72de3b33623
--- /dev/null
+++ b/test/FrontendC++/2010-07-23-DeclLoc.cpp
@@ -0,0 +1,86 @@
+// RUN: %llvmgxx -emit-llvm -S -g %s -o - | FileCheck %s
+// Require the template function declaration refer to the correct filename.
+// First, locate the function decl in metadata, and pluck out the file handle:
+// CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]],
+// Second: Require that filehandle refer to the correct filename:
+// CHECK: {{^!}}[[filehandle]] = metadata {{![{].*}} metadata !"decl_should_be_here.hpp",
+typedef long unsigned int __darwin_size_t;
+typedef __darwin_size_t size_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+namespace std {
+ template<typename _Tp> class auto_ptr {
+ _Tp* _M_ptr;
+ public:
+ typedef _Tp element_type;
+ auto_ptr(element_type* __p = 0) throw() : _M_ptr(__p) { }
+ element_type& operator*() const throw() { }
+ };
+}
+class Pointer32 {
+public:
+ typedef uint32_t ptr_t;
+ typedef uint32_t size_t;
+};
+class Pointer64 {
+public:
+ typedef uint64_t ptr_t;
+ typedef uint64_t size_t;
+};
+class BigEndian {};
+class LittleEndian {};
+template <typename _SIZE, typename _ENDIANNESS> class SizeAndEndianness {
+public:
+ typedef _SIZE SIZE;
+};
+typedef SizeAndEndianness<Pointer32, LittleEndian> ISA32Little;
+typedef SizeAndEndianness<Pointer32, BigEndian> ISA32Big;
+typedef SizeAndEndianness<Pointer64, LittleEndian> ISA64Little;
+typedef SizeAndEndianness<Pointer64, BigEndian> ISA64Big;
+template <typename SIZE> class TRange {
+protected:
+ typename SIZE::ptr_t _location;
+ typename SIZE::size_t _length;
+ TRange(typename SIZE::ptr_t location, typename SIZE::size_t length) : _location(location), _length(length) { }
+};
+template <typename SIZE, typename T> class TRangeValue : public TRange<SIZE> {
+ T _value;
+public:
+ TRangeValue(typename SIZE::ptr_t location, typename SIZE::size_t length, T value) : TRange<SIZE>(location, length), _value(value) {};
+};
+template <typename SIZE> class TAddressRelocator {};
+class CSCppSymbolOwner{};
+class CSCppSymbolOwnerData{};
+template <typename SIZE> class TRawSymbolOwnerData
+{
+ TRangeValue< SIZE, uint8_t* > _TEXT_text_section;
+ const char* _dsym_path;
+ uint32_t _dylib_current_version;
+ uint32_t _dylib_compatibility_version;
+public:
+ TRawSymbolOwnerData() :
+ _TEXT_text_section(0, 0, __null), _dsym_path(__null), _dylib_current_version(0), _dylib_compatibility_version(0) {}
+};
+template <typename SIZE_AND_ENDIANNESS> class TExtendedMachOHeader {};
+# 16 "decl_should_be_here.hpp"
+template <typename SIZE_AND_ENDIANNESS> void extract_dwarf_data_from_header(TExtendedMachOHeader<SIZE_AND_ENDIANNESS>& header,
+ TRawSymbolOwnerData<typename SIZE_AND_ENDIANNESS::SIZE>& symbol_owner_data,
+ TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>* address_relocator) {}
+struct CSCppSymbolOwnerHashFunctor {
+ size_t operator()(const CSCppSymbolOwner& symbol_owner) const {
+# 97 "wrong_place_for_decl.cpp"
+ }
+};
+template <typename SIZE_AND_ENDIANNESS> CSCppSymbolOwnerData* create_symbol_owner_data_arch_specific(CSCppSymbolOwner* symbol_owner, const char* dsym_path) {
+ typedef typename SIZE_AND_ENDIANNESS::SIZE SIZE;
+ std::auto_ptr< TRawSymbolOwnerData<SIZE> > data(new TRawSymbolOwnerData<SIZE>());
+ std::auto_ptr< TExtendedMachOHeader<SIZE_AND_ENDIANNESS> > header;
+ extract_dwarf_data_from_header(*header, *data, (TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>*)__null);
+}
+CSCppSymbolOwnerData* create_symbol_owner_data2(CSCppSymbolOwner* symbol_owner, const char* dsym_path) {
+ create_symbol_owner_data_arch_specific< ISA32Little >(symbol_owner, dsym_path);
+ create_symbol_owner_data_arch_specific< ISA32Big >(symbol_owner, dsym_path);
+ create_symbol_owner_data_arch_specific< ISA64Little >(symbol_owner, dsym_path);
+ create_symbol_owner_data_arch_specific< ISA64Big >(symbol_owner, dsym_path);
+}
diff --git a/test/FrontendC++/2010-08-31-ByValArg.cpp b/test/FrontendC++/2010-08-31-ByValArg.cpp
new file mode 100644
index 000000000000..be0d354b1d98
--- /dev/null
+++ b/test/FrontendC++/2010-08-31-ByValArg.cpp
@@ -0,0 +1,53 @@
+// This regression test checks byval arguments' debug info.
+// Radar 8367011
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN: llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe
+// RUN: echo {break get\nrun\np missing_arg.b} > %t.in
+// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RUN: grep {1 = 4242}
+
+// XTARGET: x86_64-apple-darwin
+
+class EVT {
+public:
+ int a;
+ int b;
+ int c;
+};
+
+class VAL {
+public:
+ int x;
+ int y;
+};
+void foo(EVT e);
+EVT bar();
+
+void get(int *i, unsigned dl, VAL v, VAL *p, unsigned n, EVT missing_arg) {
+//CHECK: .ascii "missing_arg"
+ EVT e = bar();
+ if (dl == n)
+ foo(missing_arg);
+}
+
+
+EVT bar() {
+ EVT e;
+ return e;
+}
+
+void foo(EVT e) {}
+
+int main(){
+ VAL v;
+ EVT ma;
+ ma.a = 1;
+ ma.b = 4242;
+ ma.c = 3;
+ int i = 42;
+ get (&i, 1, v, &v, 2, ma);
+ return 0;
+}
+
diff --git a/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/test/FrontendC/2008-03-24-BitField-And-Alloca.c
index 291f036523a4..641bcf1dbeb9 100644
--- a/test/FrontendC/2008-03-24-BitField-And-Alloca.c
+++ b/test/FrontendC/2008-03-24-BitField-And-Alloca.c
@@ -1,5 +1,5 @@
// RUN: %llvmgcc -O2 -S %s -o - | not grep alloca
-// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep store
+// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep {store }
enum {
PP_C,
diff --git a/test/FrontendC/2010-05-18-asmsched.c b/test/FrontendC/2010-05-18-asmsched.c
index 34abbe3e5c5c..12e91405d10f 100644
--- a/test/FrontendC/2010-05-18-asmsched.c
+++ b/test/FrontendC/2010-05-18-asmsched.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -c -O3 -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
+// RUN: %llvmgcc %s -c -O3 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
// r9 used to be clobbered before its value was moved to r10. 7993104.
void foo(int x, int y) {
@@ -14,4 +14,4 @@ void foo(int x, int y) {
lr9 = x;
lr10 = foo;
asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10));
-} \ No newline at end of file
+}
diff --git a/test/FrontendC/2010-07-14-overconservative-align.c b/test/FrontendC/2010-07-14-overconservative-align.c
index 65fbdb83003b..1744ba84185d 100644
--- a/test/FrontendC/2010-07-14-overconservative-align.c
+++ b/test/FrontendC/2010-07-14-overconservative-align.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -emit-llvm -m64 -S -o - | FileCheck %s
+// RUN: %llvmgcc %s -emit-llvm -S -o - | FileCheck %s
// PR 5995
struct s {
int word;
@@ -9,6 +9,6 @@ struct s {
void func (struct s *s)
{
-// CHECK: load %struct.s** %s_addr, align 8
+// CHECK: load %struct.s** %s_addr, align {{[48]}}
s->word = 0;
}
diff --git a/test/FrontendC/2010-07-14-ref-off-end.c b/test/FrontendC/2010-07-14-ref-off-end.c
index 6ccd05b770e9..c7fdd95a7aa0 100644
--- a/test/FrontendC/2010-07-14-ref-off-end.c
+++ b/test/FrontendC/2010-07-14-ref-off-end.c
@@ -17,8 +17,8 @@ return(char)s->c;
}
main()
{
-// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2]
-// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0 ; <i32*> [#uses=2]
+// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0
+// CHECK: getelementptr inbounds %struct.T* %t, i32 0, i32 0
struct T t;
t.i=0xff;
t.c=0xffff11;
diff --git a/test/FrontendC/2010-07-27-MinNoFoldConst.c b/test/FrontendC/2010-07-27-MinNoFoldConst.c
new file mode 100644
index 000000000000..7cd8b4c43764
--- /dev/null
+++ b/test/FrontendC/2010-07-27-MinNoFoldConst.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | FileCheck %s
+extern int printf(const char *, ...);
+static void bad(unsigned int v1, unsigned int v2) {
+ printf("%u\n", 1631381461u * (((v2 - 1273463329u <= v1 - 1273463329u) ? v2 : v1) - 1273463329u) + 121322179u);
+}
+// Radar 8198362
+// GCC FE wants to convert the above to
+// 1631381461u * MIN(v2 - 1273463329u, v1 - 1273463329u)
+// and then to
+// MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419)
+//
+// 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips
+// this to 4047041419. This breaks the comparision implicit in the MIN().
+// Two multiply operations suggests the bad optimization is happening;
+// one multiplication, after the MIN(), is correct.
+// CHECK: mul
+// CHECK-NOT: mul
+// CHECK: ret
diff --git a/test/FrontendC/2010-08-12-asm-aggr-arg.c b/test/FrontendC/2010-08-12-asm-aggr-arg.c
new file mode 100644
index 000000000000..81ec14b28826
--- /dev/null
+++ b/test/FrontendC/2010-08-12-asm-aggr-arg.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
+// Radar 8288710: A small aggregate can be passed as an integer. Make sure
+// we don't get an error with "input constraint with a matching output
+// constraint of incompatible type!"
+
+struct wrapper {
+ int i;
+};
+
+// CHECK: xyz
+int test(int i) {
+ struct wrapper w;
+ w.i = i;
+ __asm__("xyz" : "=r" (w) : "0" (w));
+ return w.i;
+}
diff --git a/test/FrontendC/asm-reg-var-local.c b/test/FrontendC/asm-reg-var-local.c
new file mode 100644
index 000000000000..22bd43c076d2
--- /dev/null
+++ b/test/FrontendC/asm-reg-var-local.c
@@ -0,0 +1,32 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// Exercise various use cases for local asm "register variables".
+// XFAIL: *
+// XTARGET: x86_64,i686,i386
+
+int foo() {
+// CHECK: %a = alloca i32
+
+ register int a asm("rsi")=5;
+// CHECK: store i32 5, i32* %a, align 4
+
+ asm volatile("; %0 This asm defines rsi" : "=r"(a));
+// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi}
+// CHECK: store i32 %asmtmp, i32* %a
+
+ a = 42;
+// CHECK: store i32 42, i32* %a, align 4
+
+ asm volatile("; %0 This asm uses rsi" : : "r"(a));
+// CHECK: %1 = load i32* %a, align 4
+// CHECK: call void asm sideeffect "", "{rsi}"(i32 %1) nounwind
+// CHECK: %2 = call i32 asm sideeffect "", "={rsi}"() nounwind
+// CHECK: call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2)
+
+ return a;
+// CHECK: %3 = load i32* %a, align 4
+// CHECK: call void asm sideeffect "", "{rsi}"(i32 %3) nounwind
+// CHECK: %4 = call i32 asm sideeffect "", "={rsi}"() nounwind
+// CHECK: store i32 %4, i32* %0, align 4
+// CHECK: %5 = load i32* %0, align 4
+// CHECK: store i32 %5, i32* %retval, align 4
+}
diff --git a/test/FrontendC/cstring-align.c b/test/FrontendC/cstring-align.c
index b9ec281f5677..764126e02184 100644
--- a/test/FrontendC/cstring-align.c
+++ b/test/FrontendC/cstring-align.c
@@ -1,6 +1,4 @@
-// RUN: %llvmgcc %s -c -Os -m32 -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s -check-prefix=DARWIN32
-// RUN: %llvmgcc %s -c -Os -m64 -emit-llvm -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=DARWIN64
-// XTARGET: darwin
+// RUN: %llvmgcc %s -c -Os -emit-llvm -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s
extern void func(const char *, const char *);
@@ -8,10 +6,6 @@ void long_function_name() {
func("%s: the function name", __func__);
}
-// DARWIN64: .align 4
-// DARWIN64: ___func__.
-// DARWIN64: .asciz "long_function_name"
-
-// DARWIN32: .align 4
-// DARWIN32: ___func__.
-// DARWIN32: .asciz "long_function_name"
+// CHECK: .align 4
+// CHECK: ___func__.
+// CHECK: .asciz "long_function_name"
diff --git a/test/FrontendC/misaligned-param.c b/test/FrontendC/misaligned-param.c
new file mode 100644
index 000000000000..b4fcfe312f5a
--- /dev/null
+++ b/test/FrontendC/misaligned-param.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -m32 -S -o - | FileCheck %s
+// Misaligned parameter must be memcpy'd to correctly aligned temporary.
+// XFAIL: *
+// XTARGET: i386-apple-darwin,i686-apple-darwin,x86_64-apple-darwin
+
+struct s { int x; long double y; };
+long double foo(struct s x, int i, struct s y) {
+// CHECK: foo
+// CHECK: %x_addr = alloca %struct.s, align 16
+// CHECK: %y_addr = alloca %struct.s, align 16
+// CHECK: memcpy
+// CHECK: memcpy
+// CHECK: bar
+ return bar(&x, &y);
+}
diff --git a/test/FrontendC/vla-1.c b/test/FrontendC/vla-1.c
index 76f6c53c1e16..77f78a5e3af7 100644
--- a/test/FrontendC/vla-1.c
+++ b/test/FrontendC/vla-1.c
@@ -1,5 +1,6 @@
-// RUN: true
-// %llvmgcc -std=gnu99 %s -S |& grep {error: "is greater than the stack alignment" }
+// RUN: %llvmgcc_only -std=gnu99 %s -S |& grep {warning: alignment for}
+// ppc does not support this feature, and gets a fatal error at runtime.
+// XFAIL: powerpc
int foo(int a)
{
diff --git a/test/FrontendC/vla-2.c b/test/FrontendC/vla-2.c
new file mode 100644
index 000000000000..555cfc789250
--- /dev/null
+++ b/test/FrontendC/vla-2.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16"
+
+extern void bar(int[]);
+
+void foo(int a)
+{
+ int var[a] __attribute__((__aligned__(16)));
+ bar(var);
+ return;
+}
diff --git a/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
new file mode 100644
index 000000000000..298844e97b5d
--- /dev/null
+++ b/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
@@ -0,0 +1,27 @@
+// RUN: not %llvmgcc %s -S -emit-llvm -o - |& FileCheck %s
+// This tests for a specific diagnostic in LLVM-GCC.
+// Clang compiles this correctly with no diagnostic,
+// ergo this test will fail with a Clang-based front-end.
+class TFENodeVector {
+public:
+ TFENodeVector(const TFENodeVector& inNodeVector);
+ TFENodeVector();
+};
+
+@interface TWindowHistoryEntry {}
+@property (assign, nonatomic) TFENodeVector targetPath;
+@end
+
+@implementation TWindowHistoryEntry
+@synthesize targetPath;
+- (void) initWithWindowController {
+ TWindowHistoryEntry* entry;
+ TFENodeVector newPath;
+ // CHECK: setting a C++ non-POD object value is not implemented
+#ifdef __clang__
+#error setting a C++ non-POD object value is not implemented
+#endif
+ entry.targetPath = newPath;
+ [entry setTargetPath:newPath];
+}
+@end
diff --git a/test/FrontendObjC++/2010-08-04-Template.mm b/test/FrontendObjC++/2010-08-04-Template.mm
new file mode 100644
index 000000000000..d0383406d7e2
--- /dev/null
+++ b/test/FrontendObjC++/2010-08-04-Template.mm
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -S -emit-llvm
+struct TRunSoon {
+ template <class P1> static void Post() {}
+};
+
+@implementation TPrivsTableViewMainController
+- (void) applyToEnclosed {
+ TRunSoon::Post<int>();
+}
+@end
diff --git a/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
new file mode 100644
index 000000000000..b33d7307af49
--- /dev/null
+++ b/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -emit-llvm
+struct TFENode {
+ TFENode(const TFENode& inNode);
+};
+
+@interface TIconViewController
+- (const TFENode&) target;
+@end
+
+void sortAllChildrenForNode(const TFENode&node);
+
+@implementation TIconViewController
+- (void) setArrangeBy {
+ sortAllChildrenForNode(self.target);
+}
+@end
diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m
index 2c72e9532ceb..8ed7c24dc134 100644
--- a/test/FrontendObjC/2009-08-17-DebugInfo.m
+++ b/test/FrontendObjC/2009-08-17-DebugInfo.m
@@ -5,7 +5,7 @@
// RUN: %link %t.o -o %t.exe -framework Foundation
// RUN: echo {break randomFunc\n} > %t.in
// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
-// RUN: grep {Breakpoint 1 at 0x.*: file 2009-08-17-DebugInfo.m, line 21}
+// RUN: grep {Breakpoint 1 at 0x.*: file .*2009-08-17-DebugInfo.m, line 21}
// XTARGET: darwin
@interface MyClass
{
diff --git a/test/Integer/a15.ll b/test/Integer/a15.ll
deleted file mode 100644
index 5c9dc3b1be0d..000000000000
--- a/test/Integer/a15.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 15 bits
-;
-@b = constant i15 add(i15 32767, i15 1)
-@c = constant i15 add(i15 32767, i15 32767)
-@d = constant i15 add(i15 32760, i15 8)
-@e = constant i15 sub(i15 0 , i15 1)
-@f = constant i15 sub(i15 0 , i15 32767)
-@g = constant i15 sub(i15 2 , i15 32767)
-
-@h = constant i15 shl(i15 1 , i15 15)
-@i = constant i15 shl(i15 1 , i15 14)
-@j = constant i15 lshr(i15 32767 , i15 14)
-@l = constant i15 ashr(i15 32767 , i15 14)
-
-@n = constant i15 mul(i15 32767, i15 2)
-@q = constant i15 mul(i15 -16383,i15 -3)
-@r = constant i15 sdiv(i15 -1, i15 16383)
-@s = constant i15 udiv(i15 -1, i15 16383)
-@t = constant i15 srem(i15 1, i15 32766)
-@u = constant i15 urem(i15 32767,i15 -1)
-@o = constant i15 trunc( i16 32768 to i15 )
-@p = constant i15 trunc( i16 32767 to i15 )
-@v = constant i15 srem(i15 -1, i15 768)
-
diff --git a/test/Integer/a15.ll.out b/test/Integer/a15.ll.out
deleted file mode 100644
index 5195cdf3761f..000000000000
--- a/test/Integer/a15.ll.out
+++ /dev/null
@@ -1,21 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i15 0 ; <i15*> [#uses=0]
-@c = constant i15 -2 ; <i15*> [#uses=0]
-@d = constant i15 0 ; <i15*> [#uses=0]
-@e = constant i15 -1 ; <i15*> [#uses=0]
-@f = constant i15 1 ; <i15*> [#uses=0]
-@g = constant i15 3 ; <i15*> [#uses=0]
-@h = constant i15 undef ; <i15*> [#uses=0]
-@i = constant i15 -16384 ; <i15*> [#uses=0]
-@j = constant i15 1 ; <i15*> [#uses=0]
-@l = constant i15 -1 ; <i15*> [#uses=0]
-@n = constant i15 -2 ; <i15*> [#uses=0]
-@q = constant i15 16381 ; <i15*> [#uses=0]
-@r = constant i15 0 ; <i15*> [#uses=0]
-@s = constant i15 2 ; <i15*> [#uses=0]
-@t = constant i15 1 ; <i15*> [#uses=0]
-@u = constant i15 0 ; <i15*> [#uses=0]
-@o = constant i15 0 ; <i15*> [#uses=0]
-@p = constant i15 -1 ; <i15*> [#uses=0]
-@v = constant i15 -1 ; <i15*> [#uses=0]
diff --git a/test/Integer/a17.ll b/test/Integer/a17.ll
deleted file mode 100644
index db03e7c6be0d..000000000000
--- a/test/Integer/a17.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 17 bits
-;
-@b = constant i17 add(i17 131071, i17 1)
-@c = constant i17 add(i17 131071, i17 131071)
-@d = constant i17 add(i17 131064, i17 8)
-@e = constant i17 sub(i17 0 , i17 1)
-@f = constant i17 sub(i17 0 , i17 131071)
-@g = constant i17 sub(i17 2 , i17 131071)
-
-@h = constant i17 shl(i17 1 , i17 17)
-@i = constant i17 shl(i17 1 , i17 16)
-@j = constant i17 lshr(i17 131071 , i17 16)
-@l = constant i17 ashr(i17 131071 , i17 16)
-
-@n = constant i17 mul(i17 131071, i17 2)
-@q = constant i17 sdiv(i17 -1, i17 65535)
-@r = constant i17 udiv(i17 -1, i17 65535)
-@s = constant i17 srem(i17 1, i17 131070)
-@t = constant i17 urem(i17 131071,i17 -1)
-@o = constant i17 trunc( i18 131072 to i17 )
-@p = constant i17 trunc( i18 131071 to i17 )
-@v = constant i17 srem(i17 -1, i17 15)
diff --git a/test/Integer/a17.ll.out b/test/Integer/a17.ll.out
deleted file mode 100644
index ba6641289e31..000000000000
--- a/test/Integer/a17.ll.out
+++ /dev/null
@@ -1,20 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i17 0 ; <i17*> [#uses=0]
-@c = constant i17 -2 ; <i17*> [#uses=0]
-@d = constant i17 0 ; <i17*> [#uses=0]
-@e = constant i17 -1 ; <i17*> [#uses=0]
-@f = constant i17 1 ; <i17*> [#uses=0]
-@g = constant i17 3 ; <i17*> [#uses=0]
-@h = constant i17 undef ; <i17*> [#uses=0]
-@i = constant i17 -65536 ; <i17*> [#uses=0]
-@j = constant i17 1 ; <i17*> [#uses=0]
-@l = constant i17 -1 ; <i17*> [#uses=0]
-@n = constant i17 -2 ; <i17*> [#uses=0]
-@q = constant i17 0 ; <i17*> [#uses=0]
-@r = constant i17 2 ; <i17*> [#uses=0]
-@s = constant i17 1 ; <i17*> [#uses=0]
-@t = constant i17 0 ; <i17*> [#uses=0]
-@o = constant i17 0 ; <i17*> [#uses=0]
-@p = constant i17 -1 ; <i17*> [#uses=0]
-@v = constant i17 -1 ; <i17*> [#uses=0]
diff --git a/test/Integer/a31.ll b/test/Integer/a31.ll
deleted file mode 100644
index c0c571f63068..000000000000
--- a/test/Integer/a31.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 31 bits
-;
-@b = constant i31 add(i31 2147483647, i31 1)
-@c = constant i31 add(i31 2147483647, i31 2147483647)
-@d = constant i31 add(i31 2147483640, i31 8)
-@e = constant i31 sub(i31 0 , i31 1)
-@f = constant i31 sub(i31 0 , i31 2147483647)
-@g = constant i31 sub(i31 2 , i31 2147483647)
-
-@h = constant i31 shl(i31 1 , i31 31)
-@i = constant i31 shl(i31 1 , i31 30)
-@j = constant i31 lshr(i31 2147483647 , i31 30)
-@l = constant i31 ashr(i31 2147483647 , i31 30)
-
-@n = constant i31 mul(i31 2147483647, i31 2)
-@q = constant i31 sdiv(i31 -1, i31 1073741823)
-@r = constant i31 udiv(i31 -1, i31 1073741823)
-@s = constant i31 srem(i31 1, i31 2147483646)
-@t = constant i31 urem(i31 2147483647,i31 -1)
-@o = constant i31 trunc( i32 2147483648 to i31 )
-@p = constant i31 trunc( i32 2147483647 to i31 )
-@u = constant i31 srem(i31 -3, i31 17)
diff --git a/test/Integer/a31.ll.out b/test/Integer/a31.ll.out
deleted file mode 100644
index 7407a746b5bf..000000000000
--- a/test/Integer/a31.ll.out
+++ /dev/null
@@ -1,20 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i31 0 ; <i31*> [#uses=0]
-@c = constant i31 -2 ; <i31*> [#uses=0]
-@d = constant i31 0 ; <i31*> [#uses=0]
-@e = constant i31 -1 ; <i31*> [#uses=0]
-@f = constant i31 1 ; <i31*> [#uses=0]
-@g = constant i31 3 ; <i31*> [#uses=0]
-@h = constant i31 undef ; <i31*> [#uses=0]
-@i = constant i31 -1073741824 ; <i31*> [#uses=0]
-@j = constant i31 1 ; <i31*> [#uses=0]
-@l = constant i31 -1 ; <i31*> [#uses=0]
-@n = constant i31 -2 ; <i31*> [#uses=0]
-@q = constant i31 0 ; <i31*> [#uses=0]
-@r = constant i31 2 ; <i31*> [#uses=0]
-@s = constant i31 1 ; <i31*> [#uses=0]
-@t = constant i31 0 ; <i31*> [#uses=0]
-@o = constant i31 0 ; <i31*> [#uses=0]
-@p = constant i31 -1 ; <i31*> [#uses=0]
-@u = constant i31 -3 ; <i31*> [#uses=0]
diff --git a/test/Integer/a33.ll b/test/Integer/a33.ll
deleted file mode 100644
index f328907b4608..000000000000
--- a/test/Integer/a33.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 33 bits
-;
-@b = constant i33 add(i33 8589934591, i33 1)
-@c = constant i33 add(i33 8589934591, i33 8589934591)
-@d = constant i33 add(i33 8589934584, i33 8)
-@e = constant i33 sub(i33 0 , i33 1)
-@f = constant i33 sub(i33 0 , i33 8589934591)
-@g = constant i33 sub(i33 2 , i33 8589934591)
-
-@h = constant i33 shl(i33 1 , i33 33)
-@i = constant i33 shl(i33 1 , i33 32)
-@j = constant i33 lshr(i33 8589934591 , i33 32)
-@l = constant i33 ashr(i33 8589934591 , i33 32)
-
-@n = constant i33 mul(i33 8589934591, i33 2)
-@q = constant i33 sdiv(i33 -1, i33 4294967295)
-@r = constant i33 udiv(i33 -1, i33 4294967295)
-@s = constant i33 srem(i33 1, i33 8589934590)
-@t = constant i33 urem(i33 8589934591,i33 -1)
-@o = constant i33 trunc( i34 8589934592 to i33 )
-@p = constant i33 trunc( i34 8589934591 to i33 )
-@u = constant i33 srem(i33 -1, i33 17)
-
diff --git a/test/Integer/a33.ll.out b/test/Integer/a33.ll.out
deleted file mode 100644
index 6cd61ee69cdc..000000000000
--- a/test/Integer/a33.ll.out
+++ /dev/null
@@ -1,20 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i33 0 ; <i33*> [#uses=0]
-@c = constant i33 -2 ; <i33*> [#uses=0]
-@d = constant i33 0 ; <i33*> [#uses=0]
-@e = constant i33 -1 ; <i33*> [#uses=0]
-@f = constant i33 1 ; <i33*> [#uses=0]
-@g = constant i33 3 ; <i33*> [#uses=0]
-@h = constant i33 undef ; <i33*> [#uses=0]
-@i = constant i33 -4294967296 ; <i33*> [#uses=0]
-@j = constant i33 1 ; <i33*> [#uses=0]
-@l = constant i33 -1 ; <i33*> [#uses=0]
-@n = constant i33 -2 ; <i33*> [#uses=0]
-@q = constant i33 0 ; <i33*> [#uses=0]
-@r = constant i33 2 ; <i33*> [#uses=0]
-@s = constant i33 1 ; <i33*> [#uses=0]
-@t = constant i33 0 ; <i33*> [#uses=0]
-@o = constant i33 0 ; <i33*> [#uses=0]
-@p = constant i33 -1 ; <i33*> [#uses=0]
-@u = constant i33 -1 ; <i33*> [#uses=0]
diff --git a/test/Integer/a63.ll b/test/Integer/a63.ll
deleted file mode 100644
index 052ecd585002..000000000000
--- a/test/Integer/a63.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 63 bits
-;
-@b = constant i63 add(i63 9223372036854775807, i63 1)
-@c = constant i63 add(i63 9223372036854775807, i63 9223372036854775807)
-@d = constant i63 add(i63 9223372036854775800, i63 8)
-@e = constant i63 sub(i63 0 , i63 1)
-@f = constant i63 sub(i63 0 , i63 9223372036854775807)
-@g = constant i63 sub(i63 2 , i63 9223372036854775807)
-
-@h = constant i63 shl(i63 1 , i63 63)
-@i = constant i63 shl(i63 1 , i63 62)
-@j = constant i63 lshr(i63 9223372036854775807 , i63 62)
-@l = constant i63 ashr(i63 9223372036854775807 , i63 62)
-
-@n = constant i63 mul(i63 9223372036854775807, i63 2)
-@q = constant i63 sdiv(i63 -1, i63 4611686018427387903)
-@u = constant i63 sdiv(i63 -1, i63 1)
-@r = constant i63 udiv(i63 -1, i63 4611686018427387903)
-@s = constant i63 srem(i63 3, i63 9223372036854775806)
-@t = constant i63 urem(i63 9223372036854775807,i63 -1)
-@o = constant i63 trunc( i64 9223372036854775808 to i63 )
-@p = constant i63 trunc( i64 9223372036854775807 to i63 )
diff --git a/test/Integer/a63.ll.out b/test/Integer/a63.ll.out
deleted file mode 100644
index 18dff5a2964e..000000000000
--- a/test/Integer/a63.ll.out
+++ /dev/null
@@ -1,20 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i63 0 ; <i63*> [#uses=0]
-@c = constant i63 -2 ; <i63*> [#uses=0]
-@d = constant i63 0 ; <i63*> [#uses=0]
-@e = constant i63 -1 ; <i63*> [#uses=0]
-@f = constant i63 1 ; <i63*> [#uses=0]
-@g = constant i63 3 ; <i63*> [#uses=0]
-@h = constant i63 undef ; <i63*> [#uses=0]
-@i = constant i63 -4611686018427387904 ; <i63*> [#uses=0]
-@j = constant i63 1 ; <i63*> [#uses=0]
-@l = constant i63 -1 ; <i63*> [#uses=0]
-@n = constant i63 -2 ; <i63*> [#uses=0]
-@q = constant i63 0 ; <i63*> [#uses=0]
-@u = constant i63 -1 ; <i63*> [#uses=0]
-@r = constant i63 2 ; <i63*> [#uses=0]
-@s = constant i63 1 ; <i63*> [#uses=0]
-@t = constant i63 0 ; <i63*> [#uses=0]
-@o = constant i63 0 ; <i63*> [#uses=0]
-@p = constant i63 -1 ; <i63*> [#uses=0]
diff --git a/test/Integer/a7.ll b/test/Integer/a7.ll
deleted file mode 100644
index 1edb35f9104b..000000000000
--- a/test/Integer/a7.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 7 bits
-;
-@b = constant i7 add(i7 127, i7 1)
-@q = constant i7 add(i7 -64, i7 -1)
-@c = constant i7 add(i7 127, i7 127)
-@d = constant i7 add(i7 120, i7 8)
-@e = constant i7 sub(i7 0 , i7 1)
-@f = constant i7 sub(i7 0 , i7 127)
-@g = constant i7 sub(i7 2 , i7 127)
-@r = constant i7 sub(i7 -3, i7 120)
-@s = constant i7 sub(i7 -3, i7 -8)
-
-@h = constant i7 shl(i7 1 , i7 7)
-@i = constant i7 shl(i7 1 , i7 6)
-@j = constant i7 lshr(i7 127 , i7 6)
-@l = constant i7 ashr(i7 127 , i7 6)
-@m2= constant i7 ashr(i7 -1 , i7 3)
-
-@n = constant i7 mul(i7 127, i7 2)
-@t = constant i7 mul(i7 -63, i7 -2)
-@u = constant i7 mul(i7 -32, i7 2)
-@v = constant i7 sdiv(i7 -1, i7 63)
-@w = constant i7 udiv(i7 -1, i7 63)
-@x = constant i7 srem(i7 1 , i7 126)
-@y = constant i7 urem(i7 127, i7 -1)
-@o = constant i7 trunc( i8 128 to i7 )
-@p = constant i7 trunc( i8 255 to i7 )
-
diff --git a/test/Integer/a7.ll.out b/test/Integer/a7.ll.out
deleted file mode 100644
index 250925d795e6..000000000000
--- a/test/Integer/a7.ll.out
+++ /dev/null
@@ -1,25 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i7 0 ; <i7*> [#uses=0]
-@q = constant i7 63 ; <i7*> [#uses=0]
-@c = constant i7 -2 ; <i7*> [#uses=0]
-@d = constant i7 0 ; <i7*> [#uses=0]
-@e = constant i7 -1 ; <i7*> [#uses=0]
-@f = constant i7 1 ; <i7*> [#uses=0]
-@g = constant i7 3 ; <i7*> [#uses=0]
-@r = constant i7 5 ; <i7*> [#uses=0]
-@s = constant i7 5 ; <i7*> [#uses=0]
-@h = constant i7 undef ; <i7*> [#uses=0]
-@i = constant i7 -64 ; <i7*> [#uses=0]
-@j = constant i7 1 ; <i7*> [#uses=0]
-@l = constant i7 -1 ; <i7*> [#uses=0]
-@m2 = constant i7 -1 ; <i7*> [#uses=0]
-@n = constant i7 -2 ; <i7*> [#uses=0]
-@t = constant i7 -2 ; <i7*> [#uses=0]
-@u = constant i7 -64 ; <i7*> [#uses=0]
-@v = constant i7 0 ; <i7*> [#uses=0]
-@w = constant i7 2 ; <i7*> [#uses=0]
-@x = constant i7 1 ; <i7*> [#uses=0]
-@y = constant i7 0 ; <i7*> [#uses=0]
-@o = constant i7 0 ; <i7*> [#uses=0]
-@p = constant i7 -1 ; <i7*> [#uses=0]
diff --git a/test/Integer/a9.ll b/test/Integer/a9.ll
deleted file mode 100644
index 711ec821c295..000000000000
--- a/test/Integer/a9.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t.ll
-; RUN: diff %t.ll %s.out
-
-; test 9 bits
-;
-@b = constant i9 add(i9 511, i9 1)
-@c = constant i9 add(i9 511, i9 511)
-@d = constant i9 add(i9 504, i9 8)
-@e = constant i9 sub(i9 0 , i9 1)
-@f = constant i9 sub(i9 0 , i9 511)
-@g = constant i9 sub(i9 2 , i9 511)
-
-@h = constant i9 shl(i9 1 , i9 9)
-@i = constant i9 shl(i9 1 , i9 8)
-@j = constant i9 lshr(i9 511 , i9 8)
-@l = constant i9 ashr(i9 511 , i9 8)
-
-@n = constant i9 mul(i9 511, i9 2)
-@q = constant i9 sdiv(i9 511, i9 2)
-@r = constant i9 udiv(i9 511, i9 2)
-@s = constant i9 urem(i9 511, i9 -1)
-@t = constant i9 srem(i9 1, i9 510)
-@o = constant i9 trunc( i10 512 to i9 )
-@p = constant i9 trunc( i10 511 to i9 )
-
diff --git a/test/Integer/a9.ll.out b/test/Integer/a9.ll.out
deleted file mode 100644
index 6e38062c4a03..000000000000
--- a/test/Integer/a9.ll.out
+++ /dev/null
@@ -1,19 +0,0 @@
-; ModuleID = '<stdin>'
-
-@b = constant i9 0 ; <i9*> [#uses=0]
-@c = constant i9 -2 ; <i9*> [#uses=0]
-@d = constant i9 0 ; <i9*> [#uses=0]
-@e = constant i9 -1 ; <i9*> [#uses=0]
-@f = constant i9 1 ; <i9*> [#uses=0]
-@g = constant i9 3 ; <i9*> [#uses=0]
-@h = constant i9 undef ; <i9*> [#uses=0]
-@i = constant i9 -256 ; <i9*> [#uses=0]
-@j = constant i9 1 ; <i9*> [#uses=0]
-@l = constant i9 -1 ; <i9*> [#uses=0]
-@n = constant i9 -2 ; <i9*> [#uses=0]
-@q = constant i9 0 ; <i9*> [#uses=0]
-@r = constant i9 255 ; <i9*> [#uses=0]
-@s = constant i9 0 ; <i9*> [#uses=0]
-@t = constant i9 1 ; <i9*> [#uses=0]
-@o = constant i9 0 ; <i9*> [#uses=0]
-@p = constant i9 -1 ; <i9*> [#uses=0]
diff --git a/test/LLVMC/Alias.td b/test/LLVMC/Alias.td
new file mode 100644
index 000000000000..5d37889304bd
--- /dev/null
+++ b/test/LLVMC/Alias.td
@@ -0,0 +1,24 @@
+// Test alias generation.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+
+(switch_option "dummy1", (help "none")),
+// CHECK: cl::alias Alias_dummy2
+(alias_option "dummy2", "dummy1")
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+ (switch_on "dummy1"), (forward "dummy1")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td
index 254d5eaf37af..c85f002e6e8b 100644
--- a/test/LLVMC/AppendCmdHook.td
+++ b/test/LLVMC/AppendCmdHook.td
@@ -1,7 +1,7 @@
// Check that hooks can be invoked from 'append_cmd'.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -26,4 +26,4 @@ def dummy_tool : Tool<[
(switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td
index e5d5e9a64cdb..a52b8a8c1990 100644
--- a/test/LLVMC/EmptyCompilationGraph.td
+++ b/test/LLVMC/EmptyCompilationGraph.td
@@ -1,6 +1,6 @@
// Check that the compilation graph can be empty.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td
index 86091db9bdfb..ce0cb824604c 100644
--- a/test/LLVMC/EnvParentheses.td
+++ b/test/LLVMC/EnvParentheses.td
@@ -2,7 +2,7 @@
// http://llvm.org/bugs/show_bug.cgi?id=4157
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: not grep {FOO")));} %t
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -13,6 +13,6 @@ def dummy_tool : Tool<[
(out_language "dummy")
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
def Graph : CompilationGraph<[]>;
diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td
deleted file mode 100644
index d84ea847bf12..000000000000
--- a/test/LLVMC/ExternOptions.td
+++ /dev/null
@@ -1,26 +0,0 @@
-// Check that extern options work.
-// The dummy tool and graph are required to silence warnings.
-// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
-// XFAIL: vg_leak
-
-include "llvm/CompilerDriver/Common.td"
-
-// CHECK: extern cl::opt<bool> AutoGeneratedSwitch_Wall
-
-def OptList : OptionList<[(switch_option "Wall", (extern)),
- (parameter_option "std", (extern)),
- (prefix_list_option "L", (extern))]>;
-
-def dummy_tool : Tool<[
-(command "dummy_cmd"),
-(in_language "dummy"),
-(out_language "dummy"),
-(actions (case
- (switch_on "Wall"), (stop_compilation),
- (not_empty "std"), (stop_compilation),
- (not_empty "L"), (stop_compilation)))
-]>;
-
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td
index 536b96a9758f..99b240e30fb3 100644
--- a/test/LLVMC/ForwardAs.td
+++ b/test/LLVMC/ForwardAs.td
@@ -2,12 +2,12 @@
// http://llvm.org/bugs/show_bug.cgi?id=4159
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
-def OptList : OptionList<[(parameter_option "dummy", (extern))]>;
+def OptList : OptionList<[(parameter_option "dummy", (help "dummmy"))]>;
def dummy_tool : Tool<[
(command "dummy_cmd"),
@@ -18,4 +18,4 @@ def dummy_tool : Tool<[
(not_empty "dummy"), (forward_as "dummy", "unique_name")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td
index 5e0bf290d1fd..9184ede36101 100644
--- a/test/LLVMC/ForwardTransformedValue.td
+++ b/test/LLVMC/ForwardTransformedValue.td
@@ -2,13 +2,13 @@
// The dummy tool and graph are required to silence warnings.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
-def OptList : OptionList<[(parameter_option "a", (extern)),
- (prefix_list_option "b", (extern))]>;
+def OptList : OptionList<[(parameter_option "a", (help "dummy")),
+ (prefix_list_option "b", (help "dummy"))]>;
// CHECK: std::string HookA
// CHECK: std::string HookB
@@ -18,10 +18,10 @@ def dummy_tool : Tool<[
(in_language "dummy"),
(out_language "dummy"),
(actions (case
- // CHECK: HookA(AutoGeneratedParameter_a
+ // CHECK: HookA(autogenerated::Parameter_a
(not_empty "a"), (forward_transformed_value "a", "HookA"),
- // CHECK: HookB(AutoGeneratedList_b
+ // CHECK: HookB(autogenerated::List_b
(not_empty "b"), (forward_transformed_value "b", "HookB")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td
index 4c7a0ee0ec5e..a42a3f06ec3d 100644
--- a/test/LLVMC/ForwardValue.td
+++ b/test/LLVMC/ForwardValue.td
@@ -2,23 +2,23 @@
// The dummy tool and graph are required to silence warnings.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
-def OptList : OptionList<[(parameter_option "a", (extern)),
- (prefix_list_option "b", (extern))]>;
+def OptList : OptionList<[(parameter_option "a", (help "dummy")),
+ (prefix_list_option "b", (help "dummy"))]>;
def dummy_tool : Tool<[
(command "dummy_cmd"),
(in_language "dummy"),
(out_language "dummy"),
(actions (case
- // CHECK: , AutoGeneratedParameter_a));
+ // CHECK: , autogenerated::Parameter_a));
(not_empty "a"), (forward_value "a"),
- // CHECK: B = AutoGeneratedList_b.begin()
+ // CHECK: B = autogenerated::List_b.begin()
(not_empty "b"), (forward_value "b")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td
index 5ff96cd6a88d..bbba2e984599 100644
--- a/test/LLVMC/HookWithArguments.td
+++ b/test/LLVMC/HookWithArguments.td
@@ -1,7 +1,7 @@
// Check that hooks with arguments work.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -17,4 +17,4 @@ def dummy_tool : Tool<[
(out_language "dummy")
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td
index 9855dbc5bd9a..ed08b5321ccf 100644
--- a/test/LLVMC/HookWithInFile.td
+++ b/test/LLVMC/HookWithInFile.td
@@ -1,7 +1,7 @@
// Check that a hook can be given $INFILE as an argument.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -13,4 +13,4 @@ def dummy_tool : Tool<[
(out_language "dummy")
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td
index 05209bf61aca..c3846797026e 100644
--- a/test/LLVMC/Init.td
+++ b/test/LLVMC/Init.td
@@ -1,7 +1,7 @@
// Check that (init true/false) and (init "str") work.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -22,4 +22,4 @@ def dummy_tool : Tool<[
(not_empty "dummy2"), (forward "dummy2")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/LanguageMap.td b/test/LLVMC/LanguageMap.td
new file mode 100644
index 000000000000..a0502142e6d7
--- /dev/null
+++ b/test/LLVMC/LanguageMap.td
@@ -0,0 +1,29 @@
+// Check that LanguageMap is processed properly.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+ (switch_on "dummy1"), (forward "dummy1")))
+]>;
+
+def lang_map : LanguageMap<[
+ // CHECK: langMap["dummy"] = "dummy_lang"
+ // CHECK: langMap["DUM"] = "dummy_lang"
+ (lang_to_suffixes "dummy_lang", ["dummy", "DUM"]),
+ // CHECK: langMap["DUM2"] = "dummy_lang_2"
+ (lang_to_suffixes "dummy_lang_2", "DUM2")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td
index 73ccb6311f3c..08c753380d47 100644
--- a/test/LLVMC/MultiValuedOption.td
+++ b/test/LLVMC/MultiValuedOption.td
@@ -2,7 +2,7 @@
// The dummy tool and graph are required to silence warnings.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -10,7 +10,7 @@ include "llvm/CompilerDriver/Common.td"
def OptList : OptionList<[
// CHECK: cl::multi_val(2)
(prefix_list_option "foo", (multi_val 2)),
- (parameter_list_option "baz", (multi_val 2), (extern))]>;
+ (parameter_list_option "baz", (multi_val 2))]>;
def dummy_tool : Tool<[
(command "dummy_cmd"),
@@ -21,4 +21,4 @@ def dummy_tool : Tool<[
(not_empty "baz"), (forward "baz")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td
index 86cd6131243a..b3746c03b6cb 100644
--- a/test/LLVMC/MultipleCompilationGraphs.td
+++ b/test/LLVMC/MultipleCompilationGraphs.td
@@ -1,6 +1,6 @@
// Check that multiple compilation graphs are allowed.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/MultiplePluginPriorities.td b/test/LLVMC/MultiplePluginPriorities.td
deleted file mode 100644
index 2fe06450eecb..000000000000
--- a/test/LLVMC/MultiplePluginPriorities.td
+++ /dev/null
@@ -1,17 +0,0 @@
-// Check that multiple plugin priorities are not allowed.
-// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "More than one 'PluginPriority' instance found"
-
-// Disable for Darwin PPC: <rdar://problem/7598390>
-// XFAIL: powerpc-apple-darwin
-
-// Generally XFAIL'ed for now, this is (sometimes?) failing on x86_64-apple-darwin10.
-// RUN: false
-// XFAIL: *
-
-include "llvm/CompilerDriver/Common.td"
-
-def Graph : CompilationGraph<[]>;
-
-def Priority1 : PluginPriority<1>;
-
-def Priority2 : PluginPriority<2>;
diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td
index a80bcfe6ce1c..34b444066350 100644
--- a/test/LLVMC/NoActions.td
+++ b/test/LLVMC/NoActions.td
@@ -1,7 +1,7 @@
// Check that tools without associated actions are accepted.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -13,4 +13,4 @@ def dummy_tool : Tool<[
(out_language "dummy")
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td
index 69df70133307..4182882c451f 100644
--- a/test/LLVMC/NoCompilationGraph.td
+++ b/test/LLVMC/NoCompilationGraph.td
@@ -1,6 +1,6 @@
// Check that the compilation graph is not required.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td
index 37fbc87fdfab..54fa62d1ff04 100644
--- a/test/LLVMC/OneOrMore.td
+++ b/test/LLVMC/OneOrMore.td
@@ -2,7 +2,7 @@
// The dummy tool and graph are required to silence warnings.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -22,4 +22,4 @@ def dummy_tool : Tool<[
(not_empty "baz"), (forward "baz")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
index c2641be7e645..8019c42634f3 100644
--- a/test/LLVMC/OptionPreprocessor.td
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -1,7 +1,7 @@
// Test for the OptionPreprocessor and related functionality.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
+// RUN: %compile_cxx %t
// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -63,5 +63,5 @@ def dummy : Tool<
(not_empty "foo_l"), (error)))
]>;
-def Graph : CompilationGraph<[Edge<"root", "dummy">]>;
+def Graph : CompilationGraph<[(edge "root", "dummy")]>;
diff --git a/test/LLVMC/OutputSuffixHook.td b/test/LLVMC/OutputSuffixHook.td
index 4ecad2360ba0..1f5ecd1237f3 100644
--- a/test/LLVMC/OutputSuffixHook.td
+++ b/test/LLVMC/OutputSuffixHook.td
@@ -1,8 +1,8 @@
// Check that hooks can be invoked from 'output_suffix'.
// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
// RUN: FileCheck -input-file %t %s
-// RUN: %compile_cxx -fexceptions -x c++ %t
-// XFAIL: *
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
include "llvm/CompilerDriver/Common.td"
@@ -13,7 +13,7 @@ def OptList : OptionList<[
]>;
def dummy_tool : Tool<[
-(cmd_line "dummy_cmd $INFILE"),
+(command "dummy_cmd"),
(in_language "dummy_lang"),
(out_language "dummy_lang"),
(actions (case
@@ -21,4 +21,4 @@ def dummy_tool : Tool<[
(switch_on "dummy1"), (output_suffix "$CALL(MyHook)")))
]>;
-def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td
index 0388cb0b0f1e..b0f57e97e0d8 100644
--- a/test/LLVMC/TestWarnings.td
+++ b/test/LLVMC/TestWarnings.td
@@ -5,4 +5,4 @@
include "llvm/CompilerDriver/Common.td"
-def OptList : OptionList<[(switch_option "Wall", (extern))]>;
+def OptList : OptionList<[(switch_option "Wall", (help "dummy"))]>;
diff --git a/test/Linker/metadata-a.ll b/test/Linker/metadata-a.ll
new file mode 100644
index 000000000000..5a9d2e40b948
--- /dev/null
+++ b/test/Linker/metadata-a.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s
+
+; CHECK: define void @foo(i32 %a)
+; CHECK: ret void, !attach !0, !also !{i32 %a}
+; CHECK: define void @goo(i32 %b)
+; CHECK: ret void, !attach !1, !and !{i32 %b}
+; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo}
+; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo}
+
+define void @foo(i32 %a) nounwind {
+entry:
+ ret void, !attach !0, !also !{ i32 %a }
+}
+
+!0 = metadata !{i32 524334, void (i32)* @foo}
diff --git a/test/Linker/metadata-b.ll b/test/Linker/metadata-b.ll
new file mode 100644
index 000000000000..ef0270af0756
--- /dev/null
+++ b/test/Linker/metadata-b.ll
@@ -0,0 +1,9 @@
+; This file is for use with metadata-a.ll
+; RUN: true
+
+define void @goo(i32 %b) nounwind {
+entry:
+ ret void, !attach !0, !and !{ i32 %b }
+}
+
+!0 = metadata !{i32 524334, void (i32)* @goo}
diff --git a/test/MC/AsmParser/ARM/arm_instructions.s b/test/MC/AsmParser/ARM/arm_instructions.s
new file mode 100644
index 000000000000..8632cb0cefd8
--- /dev/null
+++ b/test/MC/AsmParser/ARM/arm_instructions.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple arm-unknown-unknown %s | FileCheck %s
+
+@ CHECK: nop
+ nop
+
+@ CHECK: nopeq
+ nopeq
+
diff --git a/test/MC/AsmParser/ELF/dg.exp b/test/MC/AsmParser/ELF/dg.exp
new file mode 100644
index 000000000000..ca6aefe9c53d
--- /dev/null
+++ b/test/MC/AsmParser/ELF/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+}
+
diff --git a/test/MC/AsmParser/ELF/directive_previous.s b/test/MC/AsmParser/ELF/directive_previous.s
new file mode 100644
index 000000000000..5db1eac03d39
--- /dev/null
+++ b/test/MC/AsmParser/ELF/directive_previous.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+.bss
+# CHECK: .bss
+
+.text
+# CHECK: .text
+
+.previous
+# CHECK: .bss
+
+.previous
+# CHECK: .text
diff --git a/test/MC/AsmParser/ELF/directive_section.s b/test/MC/AsmParser/ELF/directive_section.s
new file mode 100644
index 000000000000..9531c026e674
--- /dev/null
+++ b/test/MC/AsmParser/ELF/directive_section.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+ .bss
+# CHECK: .bss
+
+ .data.rel.ro
+# CHECK: .data.rel.ro
+
+ .data.rel
+# CHECK: .data.rel
+
+ .eh_frame
+# CHECK: .eh_frame
+
+ .rodata
+# CHECK: .rodata
+
+ .tbss
+# CHECK: .tbss
+
+ .tdata
+# CHECK: .tdata
+
diff --git a/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s
new file mode 100644
index 000000000000..47bf980894d0
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_32-avx-clmul-encoding.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+ vpclmulhqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+ vpclmulhqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq $1, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01]
+ vpclmulhqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq $1, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01]
+ vpclmulhqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq $16, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10]
+ vpclmullqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq $16, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10]
+ vpclmullqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq $0, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00]
+ vpclmullqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq $0, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00]
+ vpclmullqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+ vpclmulqdq $17, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+ vpclmulqdq $17, (%eax), %xmm5, %xmm3
+
diff --git a/test/MC/AsmParser/X86/x86_32-avx-encoding.s b/test/MC/AsmParser/X86/x86_32-avx-encoding.s
new file mode 100644
index 000000000000..b7ade6670a01
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_32-avx-encoding.s
@@ -0,0 +1,3241 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0x58,0xd4]
+ vaddss %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulss %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0x59,0xd4]
+ vmulss %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubss %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0x5c,0xd4]
+ vsubss %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivss %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0x5e,0xd4]
+ vdivss %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddsd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0x58,0xd4]
+ vaddsd %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulsd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0x59,0xd4]
+ vmulsd %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubsd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4]
+ vsubsd %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivsd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4]
+ vdivsd %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddps %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
+ vaddps %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubps %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
+ vsubps %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulps %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
+ vmulps %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivps %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
+ vdivps %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddpd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
+ vaddpd %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubpd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
+ vsubpd %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulpd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
+ vmulpd %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivpd %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
+ vdivpd %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+ vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: vmaxss %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5f,0xf2]
+ vmaxss %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxsd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2]
+ vmaxsd %xmm2, %xmm4, %xmm6
+
+// CHECK: vminss %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5d,0xf2]
+ vminss %xmm2, %xmm4, %xmm6
+
+// CHECK: vminsd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2]
+ vminsd %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc]
+ vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc]
+ vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc]
+ vminss -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
+ vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+ vmaxps %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+ vmaxpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+ vminps %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+ vminpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+ vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+ vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+ vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+ vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
+ vandps %xmm2, %xmm4, %xmm6
+
+// CHECK: vandpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
+ vandpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
+ vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
+ vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
+ vorps %xmm2, %xmm4, %xmm6
+
+// CHECK: vorpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
+ vorpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
+ vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
+ vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
+ vxorps %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
+ vxorpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
+ vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
+ vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnps %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
+ vandnps %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnpd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
+ vandnpd %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
+ vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
+ vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+ vmovss -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+ vmovss %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+ vmovsd -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+ vmovsd %xmm4, %xmm2, %xmm5
+
+// CHECK: vunpckhps %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
+ vunpckhps %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
+ vunpckhpd %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklps %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
+ vunpcklps %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
+ vunpcklpd %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
+ vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
+ vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
+ vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
+ vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
+ vcmpps $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
+ vcmpps $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
+ vcmpps $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
+ vcmppd $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
+ vcmppd $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
+ vcmppd $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
+ vshufps $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
+ vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
+ vshufpd $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
+ vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
+ vcmpeqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
+ vcmpleps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
+ vcmpltps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
+ vcmpneqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
+ vcmpnleps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
+ vcmpnltps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
+ vcmpordps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
+ vcmpunordps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
+ vcmpeqpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
+ vcmplepd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
+ vcmpltpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
+ vcmpneqpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
+ vcmpnlepd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
+ vcmpnltpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
+ vcmpordpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
+ vcmpunordpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vmovmskps %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+ vmovmskps %xmm2, %eax
+
+// CHECK: vmovmskpd %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+ vmovmskpd %xmm2, %eax
+
+// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
+ vcmpeqss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
+ vcmpless %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
+ vcmpltss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
+ vcmpneqss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
+ vcmpnless %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
+ vcmpnltss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
+ vcmpordss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
+ vcmpunordss %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
+ vcmpeqsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
+ vcmplesd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
+ vcmpltsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
+ vcmpneqsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
+ vcmpnlesd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
+ vcmpnltsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
+ vcmpordsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
+ vcmpunordsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vucomiss %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
+ vucomiss %xmm1, %xmm2
+
+// CHECK: vucomiss (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
+ vucomiss (%eax), %xmm2
+
+// CHECK: vcomiss %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
+ vcomiss %xmm1, %xmm2
+
+// CHECK: vcomiss (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
+ vcomiss (%eax), %xmm2
+
+// CHECK: vucomisd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
+ vucomisd %xmm1, %xmm2
+
+// CHECK: vucomisd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
+ vucomisd (%eax), %xmm2
+
+// CHECK: vcomisd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
+ vcomisd %xmm1, %xmm2
+
+// CHECK: vcomisd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
+ vcomisd (%eax), %xmm2
+
+// CHECK: vcvttss2si %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
+ vcvttss2si %xmm1, %eax
+
+// CHECK: vcvttss2si (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+ vcvttss2si (%ecx), %eax
+
+// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+ vcvtsi2ss (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+ vcvtsi2ss (%eax), %xmm1, %xmm2
+
+// CHECK: vcvttsd2si %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
+ vcvttsd2si %xmm1, %eax
+
+// CHECK: vcvttsd2si (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+ vcvttsd2si (%ecx), %eax
+
+// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+ vcvtsi2sd (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+ vcvtsi2sd (%eax), %xmm1, %xmm2
+
+// CHECK: vmovaps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
+ vmovaps (%eax), %xmm2
+
+// CHECK: vmovaps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
+ vmovaps %xmm1, %xmm2
+
+// CHECK: vmovaps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
+ vmovaps %xmm1, (%eax)
+
+// CHECK: vmovapd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
+ vmovapd (%eax), %xmm2
+
+// CHECK: vmovapd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
+ vmovapd %xmm1, %xmm2
+
+// CHECK: vmovapd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
+ vmovapd %xmm1, (%eax)
+
+// CHECK: vmovups (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
+ vmovups (%eax), %xmm2
+
+// CHECK: vmovups %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
+ vmovups %xmm1, %xmm2
+
+// CHECK: vmovups %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
+ vmovups %xmm1, (%eax)
+
+// CHECK: vmovupd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
+ vmovupd (%eax), %xmm2
+
+// CHECK: vmovupd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
+ vmovupd %xmm1, %xmm2
+
+// CHECK: vmovupd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
+ vmovupd %xmm1, (%eax)
+
+// CHECK: vmovlps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
+ vmovlps %xmm1, (%eax)
+
+// CHECK: vmovlps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
+ vmovlps (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
+ vmovlpd %xmm1, (%eax)
+
+// CHECK: vmovlpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
+ vmovlpd (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
+ vmovhps %xmm1, (%eax)
+
+// CHECK: vmovhps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
+ vmovhps (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
+ vmovhpd %xmm1, (%eax)
+
+// CHECK: vmovhpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
+ vmovhpd (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlhps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
+ vmovlhps %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovhlps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
+ vmovhlps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcvtss2sil %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+ vcvtss2si %xmm1, %eax
+
+// CHECK: vcvtss2sil (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+ vcvtss2si (%eax), %ebx
+
+// CHECK: vcvtdq2ps %xmm5, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
+ vcvtdq2ps %xmm5, %xmm6
+
+// CHECK: vcvtdq2ps (%eax), %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
+ vcvtdq2ps (%eax), %xmm6
+
+// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
+ vcvtsd2ss %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
+ vcvtsd2ss (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtps2dq %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
+ vcvtps2dq %xmm2, %xmm3
+
+// CHECK: vcvtps2dq (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
+ vcvtps2dq (%eax), %xmm3
+
+// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
+ vcvtss2sd %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
+ vcvtss2sd (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
+ vcvtdq2ps %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps (%ecx), %xmm4
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
+ vcvtdq2ps (%ecx), %xmm4
+
+// CHECK: vcvttps2dq %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
+ vcvttps2dq %xmm2, %xmm3
+
+// CHECK: vcvttps2dq (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
+ vcvttps2dq (%eax), %xmm3
+
+// CHECK: vcvtps2pd %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
+ vcvtps2pd %xmm2, %xmm3
+
+// CHECK: vcvtps2pd (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
+ vcvtps2pd (%eax), %xmm3
+
+// CHECK: vcvtpd2ps %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
+ vcvtpd2ps %xmm2, %xmm3
+
+// CHECK: vsqrtpd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
+ vsqrtpd %xmm1, %xmm2
+
+// CHECK: vsqrtpd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
+ vsqrtpd (%eax), %xmm2
+
+// CHECK: vsqrtps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
+ vsqrtps %xmm1, %xmm2
+
+// CHECK: vsqrtps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
+ vsqrtps (%eax), %xmm2
+
+// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
+ vsqrtsd %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtsd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
+ vsqrtsd (%eax), %xmm2, %xmm3
+
+// CHECK: vsqrtss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
+ vsqrtss %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0x18]
+ vsqrtss (%eax), %xmm2, %xmm3
+
+// CHECK: vrsqrtps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
+ vrsqrtps %xmm1, %xmm2
+
+// CHECK: vrsqrtps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
+ vrsqrtps (%eax), %xmm2
+
+// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
+ vrsqrtss %xmm1, %xmm2, %xmm3
+
+// CHECK: vrsqrtss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0x18]
+ vrsqrtss (%eax), %xmm2, %xmm3
+
+// CHECK: vrcpps %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
+ vrcpps %xmm1, %xmm2
+
+// CHECK: vrcpps (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
+ vrcpps (%eax), %xmm2
+
+// CHECK: vrcpss %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
+ vrcpss %xmm1, %xmm2, %xmm3
+
+// CHECK: vrcpss (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0x18]
+ vrcpss (%eax), %xmm2, %xmm3
+
+// CHECK: vmovntdq %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
+ vmovntdq %xmm1, (%eax)
+
+// CHECK: vmovntpd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
+ vmovntpd %xmm1, (%eax)
+
+// CHECK: vmovntps %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
+ vmovntps %xmm1, (%eax)
+
+// CHECK: vldmxcsr (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
+ vldmxcsr (%eax)
+
+// CHECK: vstmxcsr (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
+ vstmxcsr (%eax)
+
+// CHECK: vldmxcsr 3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
+ vldmxcsr 0xdeadbeef
+
+// CHECK: vstmxcsr 3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
+ vstmxcsr 0xdeadbeef
+
+// CHECK: vpsubb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
+ vpsubb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
+ vpsubb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
+ vpsubw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
+ vpsubw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
+ vpsubd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
+ vpsubd (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
+ vpsubq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
+ vpsubq (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
+ vpsubsb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
+ vpsubsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
+ vpsubsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
+ vpsubsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
+ vpsubusb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
+ vpsubusb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
+ vpsubusw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
+ vpsubusw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
+ vpaddb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
+ vpaddb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
+ vpaddw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
+ vpaddw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
+ vpaddd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
+ vpaddd (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
+ vpaddq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
+ vpaddq (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
+ vpaddsb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
+ vpaddsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
+ vpaddsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
+ vpaddsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
+ vpaddusb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
+ vpaddusb (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
+ vpaddusw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
+ vpaddusw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
+ vpmulhuw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhuw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
+ vpmulhuw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
+ vpmulhw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
+ vpmulhw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmullw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
+ vpmullw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmullw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
+ vpmullw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuludq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
+ vpmuludq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmuludq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
+ vpmuludq (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
+ vpavgb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
+ vpavgb (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
+ vpavgw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
+ vpavgw (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
+ vpminsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
+ vpminsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpminub %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
+ vpminub %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminub (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
+ vpminub (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
+ vpmaxsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
+ vpmaxsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxub %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
+ vpmaxub %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxub (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
+ vpmaxub (%eax), %xmm2, %xmm3
+
+// CHECK: vpsadbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
+ vpsadbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsadbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
+ vpsadbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
+ vpsllw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
+ vpsllw (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
+ vpslld %xmm1, %xmm2, %xmm3
+
+// CHECK: vpslld (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
+ vpslld (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
+ vpsllq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
+ vpsllq (%eax), %xmm2, %xmm3
+
+// CHECK: vpsraw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
+ vpsraw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsraw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
+ vpsraw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrad %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
+ vpsrad %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrad (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
+ vpsrad (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
+ vpsrlw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
+ vpsrlw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrld %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
+ vpsrld %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrld (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
+ vpsrld (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
+ vpsrlq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
+ vpsrlq (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+ vpslld $10, %xmm2, %xmm3
+
+// CHECK: vpslldq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
+ vpslldq $10, %xmm2, %xmm3
+
+// CHECK: vpsllq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
+ vpsllq $10, %xmm2, %xmm3
+
+// CHECK: vpsllw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
+ vpsllw $10, %xmm2, %xmm3
+
+// CHECK: vpsrad $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
+ vpsrad $10, %xmm2, %xmm3
+
+// CHECK: vpsraw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
+ vpsraw $10, %xmm2, %xmm3
+
+// CHECK: vpsrld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
+ vpsrld $10, %xmm2, %xmm3
+
+// CHECK: vpsrldq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
+ vpsrldq $10, %xmm2, %xmm3
+
+// CHECK: vpsrlq $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
+ vpsrlq $10, %xmm2, %xmm3
+
+// CHECK: vpsrlw $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
+ vpsrlw $10, %xmm2, %xmm3
+
+// CHECK: vpslld $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+ vpslld $10, %xmm2, %xmm3
+
+// CHECK: vpand %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
+ vpand %xmm1, %xmm2, %xmm3
+
+// CHECK: vpand (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
+ vpand (%eax), %xmm2, %xmm3
+
+// CHECK: vpor %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
+ vpor %xmm1, %xmm2, %xmm3
+
+// CHECK: vpor (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
+ vpor (%eax), %xmm2, %xmm3
+
+// CHECK: vpxor %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
+ vpxor %xmm1, %xmm2, %xmm3
+
+// CHECK: vpxor (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
+ vpxor (%eax), %xmm2, %xmm3
+
+// CHECK: vpandn %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
+ vpandn %xmm1, %xmm2, %xmm3
+
+// CHECK: vpandn (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
+ vpandn (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
+ vpcmpeqb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
+ vpcmpeqb (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
+ vpcmpeqw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
+ vpcmpeqw (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
+ vpcmpeqd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
+ vpcmpeqd (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
+ vpcmpgtb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
+ vpcmpgtb (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
+ vpcmpgtw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
+ vpcmpgtw (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
+ vpcmpgtd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
+ vpcmpgtd (%eax), %xmm2, %xmm3
+
+// CHECK: vpacksswb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
+ vpacksswb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpacksswb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
+ vpacksswb (%eax), %xmm2, %xmm3
+
+// CHECK: vpackssdw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
+ vpackssdw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackssdw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
+ vpackssdw (%eax), %xmm2, %xmm3
+
+// CHECK: vpackuswb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
+ vpackuswb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackuswb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
+ vpackuswb (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufd $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
+ vpshufd $4, %xmm2, %xmm3
+
+// CHECK: vpshufd $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
+ vpshufd $4, (%eax), %xmm3
+
+// CHECK: vpshufhw $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
+ vpshufhw $4, %xmm2, %xmm3
+
+// CHECK: vpshufhw $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
+ vpshufhw $4, (%eax), %xmm3
+
+// CHECK: vpshuflw $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
+ vpshuflw $4, %xmm2, %xmm3
+
+// CHECK: vpshuflw $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
+ vpshuflw $4, (%eax), %xmm3
+
+// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
+ vpunpcklbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
+ vpunpcklbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
+ vpunpcklwd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
+ vpunpcklwd (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
+ vpunpckldq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckldq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
+ vpunpckldq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
+ vpunpcklqdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
+ vpunpcklqdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
+ vpunpckhbw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
+ vpunpckhbw (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
+ vpunpckhwd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
+ vpunpckhwd (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
+ vpunpckhdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
+ vpunpckhdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
+ vpunpckhqdq %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
+ vpunpckhqdq (%eax), %xmm2, %xmm3
+
+// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
+ vpinsrw $7, %eax, %xmm2, %xmm3
+
+// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
+ vpinsrw $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vpextrw $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+ vpextrw $7, %xmm2, %eax
+
+// CHECK: vpmovmskb %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
+ vpmovmskb %xmm1, %eax
+
+// CHECK: vmaskmovdqu %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
+ vmaskmovdqu %xmm1, %xmm2
+
+// CHECK: vmovd %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+ vmovd %xmm1, %eax
+
+// CHECK: vmovd %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
+ vmovd %xmm1, (%eax)
+
+// CHECK: vmovd %eax, %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
+ vmovd %eax, %xmm1
+
+// CHECK: vmovd (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
+ vmovd (%eax), %xmm1
+
+// CHECK: vmovq %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
+ vmovq %xmm1, (%eax)
+
+// CHECK: vmovq %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
+ vmovq %xmm1, %xmm2
+
+// CHECK: vmovq (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
+ vmovq (%eax), %xmm1
+
+// CHECK: vcvtpd2dq %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
+ vcvtpd2dq %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
+ vcvtdq2pd %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
+ vcvtdq2pd (%eax), %xmm2
+
+// CHECK: vmovshdup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
+ vmovshdup %xmm1, %xmm2
+
+// CHECK: vmovshdup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
+ vmovshdup (%eax), %xmm2
+
+// CHECK: vmovsldup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
+ vmovsldup %xmm1, %xmm2
+
+// CHECK: vmovsldup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
+ vmovsldup (%eax), %xmm2
+
+// CHECK: vmovddup %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
+ vmovddup %xmm1, %xmm2
+
+// CHECK: vmovddup (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
+ vmovddup (%eax), %xmm2
+
+// CHECK: vaddsubps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
+ vaddsubps %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubps (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
+ vaddsubps (%eax), %xmm1, %xmm2
+
+// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
+ vaddsubpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubpd (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
+ vaddsubpd (%eax), %xmm1, %xmm2
+
+// CHECK: vhaddps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
+ vhaddps %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
+ vhaddps (%eax), %xmm2, %xmm3
+
+// CHECK: vhaddpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
+ vhaddpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
+ vhaddpd (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubps %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
+ vhsubps %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubps (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
+ vhsubps (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubpd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
+ vhsubpd %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubpd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
+ vhsubpd (%eax), %xmm2, %xmm3
+
+// CHECK: vpabsb %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
+ vpabsb %xmm1, %xmm2
+
+// CHECK: vpabsb (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
+ vpabsb (%eax), %xmm2
+
+// CHECK: vpabsw %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
+ vpabsw %xmm1, %xmm2
+
+// CHECK: vpabsw (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
+ vpabsw (%eax), %xmm2
+
+// CHECK: vpabsd %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
+ vpabsd %xmm1, %xmm2
+
+// CHECK: vpabsd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
+ vpabsd (%eax), %xmm2
+
+// CHECK: vphaddw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
+ vphaddw %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
+ vphaddw (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
+ vphaddd %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
+ vphaddd (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
+ vphaddsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
+ vphaddsw (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
+ vphsubw %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
+ vphsubw (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
+ vphsubd %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
+ vphsubd (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
+ vphsubsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
+ vphsubsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
+ vpmaddubsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
+ vpmaddubsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
+ vpshufb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpshufb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
+ vpshufb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
+ vpsignb %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
+ vpsignb (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
+ vpsignw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
+ vpsignw (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignd %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
+ vpsignd %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
+ vpsignd (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
+ vpmulhrsw %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
+ vpmulhrsw (%eax), %xmm2, %xmm3
+
+// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
+ vpalignr $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
+ vpalignr $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07]
+ vroundsd $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07]
+ vroundsd $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07]
+ vroundss $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundss $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07]
+ vroundss $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundpd $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07]
+ vroundpd $7, %xmm2, %xmm3
+
+// CHECK: vroundpd $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07]
+ vroundpd $7, (%eax), %xmm3
+
+// CHECK: vroundps $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07]
+ vroundps $7, %xmm2, %xmm3
+
+// CHECK: vroundps $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07]
+ vroundps $7, (%eax), %xmm3
+
+// CHECK: vphminposuw %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda]
+ vphminposuw %xmm2, %xmm3
+
+// CHECK: vphminposuw (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
+ vphminposuw (%eax), %xmm2
+
+// CHECK: vpackusdw %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
+ vpackusdw %xmm2, %xmm3, %xmm1
+
+// CHECK: vpackusdw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
+ vpackusdw (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
+ vpcmpeqq %xmm2, %xmm3, %xmm1
+
+// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
+ vpcmpeqq (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsb %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
+ vpminsb %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
+ vpminsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsd %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
+ vpminsd %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
+ vpminsd (%eax), %xmm2, %xmm3
+
+// CHECK: vpminud %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
+ vpminud %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminud (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
+ vpminud (%eax), %xmm2, %xmm3
+
+// CHECK: vpminuw %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
+ vpminuw %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminuw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
+ vpminuw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
+ vpmaxsb %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsb (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
+ vpmaxsb (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
+ vpmaxsd %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsd (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
+ vpmaxsd (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxud %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
+ vpmaxud %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxud (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
+ vpmaxud (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
+ vpmaxuw %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxuw (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
+ vpmaxuw (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuldq %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
+ vpmuldq %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmuldq (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
+ vpmuldq (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulld %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca]
+ vpmulld %xmm2, %xmm5, %xmm1
+
+// CHECK: vpmulld (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18]
+ vpmulld (%eax), %xmm5, %xmm3
+
+// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03]
+ vblendps $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendps $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03]
+ vblendps $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03]
+ vblendpd $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03]
+ vblendpd $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03]
+ vpblendw $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03]
+ vpblendw $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03]
+ vmpsadbw $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03]
+ vmpsadbw $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03]
+ vdpps $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdpps $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03]
+ vdpps $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03]
+ vdppd $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdppd $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
+ vdppd $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
+ vblendvpd %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
+ vblendvpd %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
+ vblendvps %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
+ vblendvps %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
+ vpblendvb %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
+ vpblendvb %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpmovsxbw %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea]
+ vpmovsxbw %xmm2, %xmm5
+
+// CHECK: vpmovsxbw (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10]
+ vpmovsxbw (%eax), %xmm2
+
+// CHECK: vpmovsxwd %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea]
+ vpmovsxwd %xmm2, %xmm5
+
+// CHECK: vpmovsxwd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10]
+ vpmovsxwd (%eax), %xmm2
+
+// CHECK: vpmovsxdq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea]
+ vpmovsxdq %xmm2, %xmm5
+
+// CHECK: vpmovsxdq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10]
+ vpmovsxdq (%eax), %xmm2
+
+// CHECK: vpmovzxbw %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea]
+ vpmovzxbw %xmm2, %xmm5
+
+// CHECK: vpmovzxbw (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10]
+ vpmovzxbw (%eax), %xmm2
+
+// CHECK: vpmovzxwd %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea]
+ vpmovzxwd %xmm2, %xmm5
+
+// CHECK: vpmovzxwd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10]
+ vpmovzxwd (%eax), %xmm2
+
+// CHECK: vpmovzxdq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea]
+ vpmovzxdq %xmm2, %xmm5
+
+// CHECK: vpmovzxdq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10]
+ vpmovzxdq (%eax), %xmm2
+
+// CHECK: vpmovsxbq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea]
+ vpmovsxbq %xmm2, %xmm5
+
+// CHECK: vpmovsxbq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10]
+ vpmovsxbq (%eax), %xmm2
+
+// CHECK: vpmovzxbq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea]
+ vpmovzxbq %xmm2, %xmm5
+
+// CHECK: vpmovzxbq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10]
+ vpmovzxbq (%eax), %xmm2
+
+// CHECK: vpmovsxbd %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea]
+ vpmovsxbd %xmm2, %xmm5
+
+// CHECK: vpmovsxbd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10]
+ vpmovsxbd (%eax), %xmm2
+
+// CHECK: vpmovsxwq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea]
+ vpmovsxwq %xmm2, %xmm5
+
+// CHECK: vpmovsxwq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10]
+ vpmovsxwq (%eax), %xmm2
+
+// CHECK: vpmovzxbd %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea]
+ vpmovzxbd %xmm2, %xmm5
+
+// CHECK: vpmovzxbd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10]
+ vpmovzxbd (%eax), %xmm2
+
+// CHECK: vpmovzxwq %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea]
+ vpmovzxwq %xmm2, %xmm5
+
+// CHECK: vpmovzxwq (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10]
+ vpmovzxwq (%eax), %xmm2
+
+// CHECK: vpextrw $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+ vpextrw $7, %xmm2, %eax
+
+// CHECK: vpextrw $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07]
+ vpextrw $7, %xmm2, (%eax)
+
+// CHECK: vpextrd $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07]
+ vpextrd $7, %xmm2, %eax
+
+// CHECK: vpextrd $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07]
+ vpextrd $7, %xmm2, (%eax)
+
+// CHECK: vpextrb $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07]
+ vpextrb $7, %xmm2, %eax
+
+// CHECK: vpextrb $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
+ vpextrb $7, %xmm2, (%eax)
+
+// CHECK: vextractps $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
+ vextractps $7, %xmm2, (%eax)
+
+// CHECK: vextractps $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
+ vextractps $7, %xmm2, %eax
+
+// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
+ vpinsrw $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
+ vpinsrw $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
+ vpinsrb $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
+ vpinsrb $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
+ vpinsrd $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
+ vpinsrd $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
+ vinsertps $7, %xmm2, %xmm5, %xmm1
+
+// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
+ vinsertps $7, (%eax), %xmm5, %xmm1
+
+// CHECK: vptest %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
+ vptest %xmm2, %xmm5
+
+// CHECK: vptest (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
+ vptest (%eax), %xmm2
+
+// CHECK: vmovntdqa (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
+ vmovntdqa (%eax), %xmm2
+
+// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca]
+ vpcmpgtq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18]
+ vpcmpgtq (%eax), %xmm5, %xmm3
+
+// CHECK: vpcmpistrm $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07]
+ vpcmpistrm $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistrm $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07]
+ vpcmpistrm $7, (%eax), %xmm5
+
+// CHECK: vpcmpestrm $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07]
+ vpcmpestrm $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestrm $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07]
+ vpcmpestrm $7, (%eax), %xmm5
+
+// CHECK: vpcmpistri $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07]
+ vpcmpistri $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistri $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07]
+ vpcmpistri $7, (%eax), %xmm5
+
+// CHECK: vpcmpestri $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07]
+ vpcmpestri $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestri $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07]
+ vpcmpestri $7, (%eax), %xmm5
+
+// CHECK: vaesimc %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea]
+ vaesimc %xmm2, %xmm5
+
+// CHECK: vaesimc (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10]
+ vaesimc (%eax), %xmm2
+
+// CHECK: vaesenc %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca]
+ vaesenc %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenc (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18]
+ vaesenc (%eax), %xmm5, %xmm3
+
+// CHECK: vaesenclast %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca]
+ vaesenclast %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenclast (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18]
+ vaesenclast (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdec %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca]
+ vaesdec %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdec (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18]
+ vaesdec (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca]
+ vaesdeclast %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdeclast (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18]
+ vaesdeclast (%eax), %xmm5, %xmm3
+
+// CHECK: vaeskeygenassist $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07]
+ vaeskeygenassist $7, %xmm2, %xmm5
+
+// CHECK: vaeskeygenassist $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07]
+ vaeskeygenassist $7, (%eax), %xmm5
+
+// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08]
+ vcmpeq_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09]
+ vcmpngeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a]
+ vcmpngtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b]
+ vcmpfalseps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c]
+ vcmpneq_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d]
+ vcmpgeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e]
+ vcmpgtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f]
+ vcmptrueps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10]
+ vcmpeq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11]
+ vcmplt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12]
+ vcmple_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13]
+ vcmpunord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14]
+ vcmpneq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15]
+ vcmpnlt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16]
+ vcmpnle_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17]
+ vcmpord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18]
+ vcmpeq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19]
+ vcmpnge_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a]
+ vcmpngt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b]
+ vcmpfalse_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c]
+ vcmpneq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d]
+ vcmpge_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e]
+ vcmpgt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
+ vcmptrue_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovaps (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
+ vmovaps (%eax), %ymm2
+
+// CHECK: vmovaps %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
+ vmovaps %ymm1, %ymm2
+
+// CHECK: vmovaps %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
+ vmovaps %ymm1, (%eax)
+
+// CHECK: vmovapd (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
+ vmovapd (%eax), %ymm2
+
+// CHECK: vmovapd %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
+ vmovapd %ymm1, %ymm2
+
+// CHECK: vmovapd %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
+ vmovapd %ymm1, (%eax)
+
+// CHECK: vmovups (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
+ vmovups (%eax), %ymm2
+
+// CHECK: vmovups %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
+ vmovups %ymm1, %ymm2
+
+// CHECK: vmovups %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
+ vmovups %ymm1, (%eax)
+
+// CHECK: vmovupd (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
+ vmovupd (%eax), %ymm2
+
+// CHECK: vmovupd %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
+ vmovupd %ymm1, %ymm2
+
+// CHECK: vmovupd %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
+ vmovupd %ymm1, (%eax)
+
+// CHECK: vunpckhps %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
+ vunpckhps %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
+ vunpckhpd %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklps %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
+ vunpcklps %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
+ vunpcklpd %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
+ vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
+ vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
+ vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
+ vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vmovntdq %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0xe7,0x08]
+ vmovntdq %ymm1, (%eax)
+
+// CHECK: vmovntpd %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x2b,0x08]
+ vmovntpd %ymm1, (%eax)
+
+// CHECK: vmovntps %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x2b,0x08]
+ vmovntps %ymm1, (%eax)
+
+// CHECK: vmovmskps %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+ vmovmskps %xmm2, %eax
+
+// CHECK: vmovmskpd %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+ vmovmskpd %xmm2, %eax
+
+// CHECK: vmaxps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2]
+ vmaxps %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2]
+ vmaxpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vminps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2]
+ vminps %ymm2, %ymm4, %ymm6
+
+// CHECK: vminpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2]
+ vminpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2]
+ vsubps %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2]
+ vsubpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2]
+ vdivps %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2]
+ vdivpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0xf2]
+ vaddps %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0xf2]
+ vaddpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0xf2]
+ vmulps %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0xf2]
+ vmulpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+ vmaxps (%eax), %ymm4, %ymm6
+
+// CHECK: vmaxpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+ vmaxpd (%eax), %ymm4, %ymm6
+
+// CHECK: vminps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+ vminps (%eax), %ymm4, %ymm6
+
+// CHECK: vminpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+ vminpd (%eax), %ymm4, %ymm6
+
+// CHECK: vsubps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+ vsubps (%eax), %ymm4, %ymm6
+
+// CHECK: vsubpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+ vsubpd (%eax), %ymm4, %ymm6
+
+// CHECK: vdivps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+ vdivps (%eax), %ymm4, %ymm6
+
+// CHECK: vdivpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+ vdivpd (%eax), %ymm4, %ymm6
+
+// CHECK: vaddps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+ vaddps (%eax), %ymm4, %ymm6
+
+// CHECK: vaddpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+ vaddpd (%eax), %ymm4, %ymm6
+
+// CHECK: vmulps (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+ vmulps (%eax), %ymm4, %ymm6
+
+// CHECK: vmulpd (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+ vmulpd (%eax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0xd1]
+ vsqrtpd %ymm1, %ymm2
+
+// CHECK: vsqrtpd (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0x10]
+ vsqrtpd (%eax), %ymm2
+
+// CHECK: vsqrtps %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0xd1]
+ vsqrtps %ymm1, %ymm2
+
+// CHECK: vsqrtps (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0x10]
+ vsqrtps (%eax), %ymm2
+
+// CHECK: vrsqrtps %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0xd1]
+ vrsqrtps %ymm1, %ymm2
+
+// CHECK: vrsqrtps (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0x10]
+ vrsqrtps (%eax), %ymm2
+
+// CHECK: vrcpps %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0xd1]
+ vrcpps %ymm1, %ymm2
+
+// CHECK: vrcpps (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0x10]
+ vrcpps (%eax), %ymm2
+
+// CHECK: vandps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x54,0xf2]
+ vandps %ymm2, %ymm4, %ymm6
+
+// CHECK: vandpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x54,0xf2]
+ vandpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc]
+ vandps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc]
+ vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x56,0xf2]
+ vorps %ymm2, %ymm4, %ymm6
+
+// CHECK: vorpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x56,0xf2]
+ vorpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc]
+ vorps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc]
+ vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x57,0xf2]
+ vxorps %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x57,0xf2]
+ vxorpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc]
+ vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc]
+ vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnps %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x55,0xf2]
+ vandnps %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnpd %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x55,0xf2]
+ vandnpd %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc]
+ vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
+ vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vcvtps2pd %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
+ vcvtps2pd %xmm3, %ymm2
+
+// CHECK: vcvtps2pd (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
+ vcvtps2pd (%eax), %ymm2
+
+// CHECK: vcvtdq2pd %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
+ vcvtdq2pd %xmm3, %ymm2
+
+// CHECK: vcvtdq2pd (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
+ vcvtdq2pd (%eax), %ymm2
+
+// CHECK: vcvtdq2ps %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
+ vcvtdq2ps %ymm2, %ymm5
+
+// CHECK: vcvtdq2ps (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
+ vcvtdq2ps (%eax), %ymm2
+
+// CHECK: vcvtps2dq %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
+ vcvtps2dq %ymm2, %ymm5
+
+// CHECK: vcvtps2dq (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
+ vcvtps2dq (%eax), %ymm5
+
+// CHECK: vcvttps2dq %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
+ vcvttps2dq %ymm2, %ymm5
+
+// CHECK: vcvttps2dq (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
+ vcvttps2dq (%eax), %ymm5
+
+// CHECK: vcvttpd2dq %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+ vcvttpd2dq %xmm1, %xmm5
+
+// CHECK: vcvttpd2dq %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
+ vcvttpd2dq %ymm2, %xmm5
+
+// CHECK: vcvttpd2dqx %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+ vcvttpd2dqx %xmm1, %xmm5
+
+// CHECK: vcvttpd2dqx (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
+ vcvttpd2dqx (%eax), %xmm1
+
+// CHECK: vcvttpd2dqy %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
+ vcvttpd2dqy %ymm2, %xmm1
+
+// CHECK: vcvttpd2dqy (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
+ vcvttpd2dqy (%eax), %xmm1
+
+// CHECK: vcvtpd2ps %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
+ vcvtpd2ps %ymm2, %xmm5
+
+// CHECK: vcvtpd2psx %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
+ vcvtpd2psx %xmm1, %xmm5
+
+// CHECK: vcvtpd2psx (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
+ vcvtpd2psx (%eax), %xmm1
+
+// CHECK: vcvtpd2psy %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
+ vcvtpd2psy %ymm2, %xmm1
+
+// CHECK: vcvtpd2psy (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
+ vcvtpd2psy (%eax), %xmm1
+
+// CHECK: vcvtpd2dq %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
+ vcvtpd2dq %ymm2, %xmm5
+
+// CHECK: vcvtpd2dqy %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
+ vcvtpd2dqy %ymm2, %xmm1
+
+// CHECK: vcvtpd2dqy (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
+ vcvtpd2dqy (%eax), %xmm1
+
+// CHECK: vcvtpd2dqx %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
+ vcvtpd2dqx %xmm1, %xmm5
+
+// CHECK: vcvtpd2dqx (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
+ vcvtpd2dqx (%eax), %xmm1
+
+// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
+ vcmpeqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
+ vcmpleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
+ vcmpltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
+ vcmpneqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
+ vcmpnleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
+ vcmpnltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
+ vcmpordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
+ vcmpunordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
+ vcmpeqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
+ vcmplepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
+ vcmpltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
+ vcmpneqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
+ vcmpnlepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
+ vcmpnltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
+ vcmpordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
+ vcmpunordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
+ vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
+ vcmpeq_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
+ vcmpngeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
+ vcmpngtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
+ vcmpfalseps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
+ vcmpneq_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
+ vcmpgeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
+ vcmpgtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
+ vcmptrueps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
+ vcmpeq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
+ vcmplt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
+ vcmple_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
+ vcmpunord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
+ vcmpneq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
+ vcmpnlt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
+ vcmpnle_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
+ vcmpord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
+ vcmpeq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
+ vcmpnge_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
+ vcmpngt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
+ vcmpfalse_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
+ vcmpneq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
+ vcmpge_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
+ vcmpgt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
+ vcmptrue_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
+ vaddsubps %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
+ vaddsubps (%eax), %ymm1, %ymm2
+
+// CHECK: vaddsubpd %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
+ vaddsubpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubpd (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
+ vaddsubpd (%eax), %ymm1, %ymm2
+
+// CHECK: vhaddps %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
+ vhaddps %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddps (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
+ vhaddps (%eax), %ymm2, %ymm3
+
+// CHECK: vhaddpd %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
+ vhaddpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddpd (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
+ vhaddpd (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubps %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
+ vhsubps %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubps (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
+ vhsubps (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubpd %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
+ vhsubpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubpd (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
+ vhsubpd (%eax), %ymm2, %ymm3
+
+// CHECK: vblendps $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
+ vblendps $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendps $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
+ vblendps $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vblendpd $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
+ vblendpd $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendpd $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
+ vblendpd $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vdpps $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
+ vdpps $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vdpps $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
+ vdpps $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vbroadcastf128 (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10]
+ vbroadcastf128 (%eax), %ymm2
+
+// CHECK: vbroadcastsd (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10]
+ vbroadcastsd (%eax), %ymm2
+
+// CHECK: vbroadcastss (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10]
+ vbroadcastss (%eax), %xmm2
+
+// CHECK: vbroadcastss (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10]
+ vbroadcastss (%eax), %ymm2
+
+// CHECK: vinsertf128 $7, %xmm2, %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07]
+ vinsertf128 $7, %xmm2, %ymm2, %ymm5
+
+// CHECK: vinsertf128 $7, (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07]
+ vinsertf128 $7, (%eax), %ymm2, %ymm5
+
+// CHECK: vextractf128 $7, %ymm2, %xmm2
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07]
+ vextractf128 $7, %ymm2, %xmm2
+
+// CHECK: vextractf128 $7, %ymm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07]
+ vextractf128 $7, %ymm2, (%eax)
+
+// CHECK: vmaskmovpd %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10]
+ vmaskmovpd %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovpd %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10]
+ vmaskmovpd %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovpd (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28]
+ vmaskmovpd (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovpd (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28]
+ vmaskmovpd (%eax), %ymm2, %ymm5
+
+// CHECK: vmaskmovps %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10]
+ vmaskmovps %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovps %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10]
+ vmaskmovps %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovps (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28]
+ vmaskmovps (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovps (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28]
+ vmaskmovps (%eax), %ymm2, %ymm5
+
+// CHECK: vpermilps $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07]
+ vpermilps $7, %xmm1, %xmm5
+
+// CHECK: vpermilps $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07]
+ vpermilps $7, %ymm5, %ymm1
+
+// CHECK: vpermilps $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07]
+ vpermilps $7, (%eax), %xmm5
+
+// CHECK: vpermilps $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07]
+ vpermilps $7, (%eax), %ymm5
+
+// CHECK: vpermilps %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9]
+ vpermilps %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilps %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9]
+ vpermilps %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilps (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18]
+ vpermilps (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08]
+ vpermilps (%eax), %ymm5, %ymm1
+
+// CHECK: vpermilpd $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07]
+ vpermilpd $7, %xmm1, %xmm5
+
+// CHECK: vpermilpd $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07]
+ vpermilpd $7, %ymm5, %ymm1
+
+// CHECK: vpermilpd $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07]
+ vpermilpd $7, (%eax), %xmm5
+
+// CHECK: vpermilpd $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07]
+ vpermilpd $7, (%eax), %ymm5
+
+// CHECK: vpermilpd %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9]
+ vpermilpd %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilpd %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9]
+ vpermilpd %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilpd (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18]
+ vpermilpd (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilpd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08]
+ vpermilpd (%eax), %ymm5, %ymm1
+
+// CHECK: vperm2f128 $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07]
+ vperm2f128 $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vperm2f128 $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07]
+ vperm2f128 $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vzeroall
+// CHECK: encoding: [0xc5,0xfc,0x77]
+ vzeroall
+
+// CHECK: vzeroupper
+// CHECK: encoding: [0xc5,0xf8,0x77]
+ vzeroupper
+
+// CHECK: vcvtsd2si %xmm4, %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
+ vcvtsd2si %xmm4, %ecx
+
+// CHECK: vcvtsd2si (%ecx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+ vcvtsd2si (%ecx), %ecx
+
+// CHECK: vcvtsi2sdl (%ebp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
+ vcvtsi2sdl (%ebp), %xmm0, %xmm7
+
+// CHECK: vcvtsi2sdl (%esp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
+ vcvtsi2sdl (%esp), %xmm0, %xmm7
+
+// CHECK: vlddqu (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0xf0,0x10]
+ vlddqu (%eax), %ymm2
+
+// CHECK: vmovddup %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xff,0x12,0xea]
+ vmovddup %ymm2, %ymm5
+
+// CHECK: vmovddup (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0x12,0x10]
+ vmovddup (%eax), %ymm2
+
+// CHECK: vmovdqa %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x6f,0xea]
+ vmovdqa %ymm2, %ymm5
+
+// CHECK: vmovdqa %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x7f,0x10]
+ vmovdqa %ymm2, (%eax)
+
+// CHECK: vmovdqa (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x6f,0x10]
+ vmovdqa (%eax), %ymm2
+
+// CHECK: vmovdqu %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x6f,0xea]
+ vmovdqu %ymm2, %ymm5
+
+// CHECK: vmovdqu %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfe,0x7f,0x10]
+ vmovdqu %ymm2, (%eax)
+
+// CHECK: vmovdqu (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x6f,0x10]
+ vmovdqu (%eax), %ymm2
+
+// CHECK: vmovshdup %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x16,0xea]
+ vmovshdup %ymm2, %ymm5
+
+// CHECK: vmovshdup (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x16,0x10]
+ vmovshdup (%eax), %ymm2
+
+// CHECK: vmovsldup %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x12,0xea]
+ vmovsldup %ymm2, %ymm5
+
+// CHECK: vmovsldup (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x12,0x10]
+ vmovsldup (%eax), %ymm2
+
+// CHECK: vptest %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea]
+ vptest %ymm2, %ymm5
+
+// CHECK: vptest (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10]
+ vptest (%eax), %ymm2
+
+// CHECK: vroundpd $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07]
+ vroundpd $7, %ymm5, %ymm1
+
+// CHECK: vroundpd $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07]
+ vroundpd $7, (%eax), %ymm5
+
+// CHECK: vroundps $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07]
+ vroundps $7, %ymm5, %ymm1
+
+// CHECK: vroundps $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07]
+ vroundps $7, (%eax), %ymm5
+
+// CHECK: vshufpd $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07]
+ vshufpd $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufpd $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07]
+ vshufpd $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vshufps $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07]
+ vshufps $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufps $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07]
+ vshufps $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vtestpd %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea]
+ vtestpd %xmm2, %xmm5
+
+// CHECK: vtestpd %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea]
+ vtestpd %ymm2, %ymm5
+
+// CHECK: vtestpd (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10]
+ vtestpd (%eax), %xmm2
+
+// CHECK: vtestpd (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10]
+ vtestpd (%eax), %ymm2
+
+// CHECK: vtestps %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea]
+ vtestps %xmm2, %xmm5
+
+// CHECK: vtestps %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea]
+ vtestps %ymm2, %ymm5
+
+// CHECK: vtestps (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10]
+ vtestps (%eax), %xmm2
+
+// CHECK: vtestps (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10]
+ vtestps (%eax), %ymm2
+
+// CHECK: vblendvpd %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00]
+ vblendvpd %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2
+
diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s
index ebafb11061ef..ef774239ffe8 100644
--- a/test/MC/AsmParser/X86/x86_32-encoding.s
+++ b/test/MC/AsmParser/X86/x86_32-encoding.s
@@ -10047,2882 +10047,23 @@
// CHECK: encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
ficomps 32493
-// CHECK: vaddss %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0x58,0xd4]
- vaddss %xmm4, %xmm6, %xmm2
+// CHECK: movl 57005(,%eiz), %ebx
+// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+ movl 57005(,%eiz), %ebx
-// CHECK: vmulss %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0x59,0xd4]
- vmulss %xmm4, %xmm6, %xmm2
+// CHECK: movl 48879(,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+ movl 48879(,%eiz), %eax
-// CHECK: vsubss %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0x5c,0xd4]
- vsubss %xmm4, %xmm6, %xmm2
+// CHECK: movl -4(,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+ movl -4(,%eiz,8), %eax
-// CHECK: vdivss %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0x5e,0xd4]
- vdivss %xmm4, %xmm6, %xmm2
+// CHECK: movl (%ecx,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x21]
+ movl (%ecx,%eiz), %eax
-// CHECK: vaddsd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0x58,0xd4]
- vaddsd %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulsd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0x59,0xd4]
- vmulsd %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubsd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0x5c,0xd4]
- vsubsd %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivsd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0x5e,0xd4]
- vdivsd %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vaddss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vsubss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vmulss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vdivss 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vaddsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vsubsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vmulsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vdivsd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddps %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
- vaddps %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubps %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
- vsubps %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulps %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
- vmulps %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivps %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
- vdivps %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddpd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
- vaddpd %xmm4, %xmm6, %xmm2
-
-// CHECK: vsubpd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
- vsubpd %xmm4, %xmm6, %xmm2
-
-// CHECK: vmulpd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
- vmulpd %xmm4, %xmm6, %xmm2
-
-// CHECK: vdivpd %xmm4, %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
- vdivpd %xmm4, %xmm6, %xmm2
-
-// CHECK: vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vaddps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vsubps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vmulps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vdivps 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vaddpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vsubpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vmulpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
- vdivpd 3735928559(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: vmaxss %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5f,0xf2]
- vmaxss %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxsd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2]
- vmaxsd %xmm2, %xmm4, %xmm6
-
-// CHECK: vminss %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5d,0xf2]
- vminss %xmm2, %xmm4, %xmm6
-
-// CHECK: vminsd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2]
- vminsd %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc]
- vmaxss -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc]
- vmaxsd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminss -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc]
- vminss -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
- vminsd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
- vmaxps %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
- vmaxpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vminps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
- vminps %xmm2, %xmm4, %xmm6
-
-// CHECK: vminpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
- vminpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
- vmaxps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
- vmaxpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
- vminps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
- vminpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
- vandps %xmm2, %xmm4, %xmm6
-
-// CHECK: vandpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
- vandpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
- vandps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
- vandpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vorps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
- vorps %xmm2, %xmm4, %xmm6
-
-// CHECK: vorpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
- vorpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
- vorps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
- vorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vxorps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
- vxorps %xmm2, %xmm4, %xmm6
-
-// CHECK: vxorpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
- vxorpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
- vxorps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
- vxorpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandnps %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
- vandnps %xmm2, %xmm4, %xmm6
-
-// CHECK: vandnpd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
- vandnpd %xmm2, %xmm4, %xmm6
-
-// CHECK: vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
- vandnps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
- vandnpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vmovss -4(%ebx,%ecx,8), %xmm5
-// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
- vmovss -4(%ebx,%ecx,8), %xmm5
-
-// CHECK: vmovss %xmm4, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xea,0x10,0xec]
- vmovss %xmm4, %xmm2, %xmm5
-
-// CHECK: vmovsd -4(%ebx,%ecx,8), %xmm5
-// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
- vmovsd -4(%ebx,%ecx,8), %xmm5
-
-// CHECK: vmovsd %xmm4, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
- vmovsd %xmm4, %xmm2, %xmm5
-
-// CHECK: vunpckhps %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
- vunpckhps %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpckhpd %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
- vunpckhpd %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpcklps %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
- vunpcklps %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpcklpd %xmm1, %xmm2, %xmm4
-// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
- vunpcklpd %xmm1, %xmm2, %xmm4
-
-// CHECK: vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
- vunpckhps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
- vunpckhpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
- vunpcklps -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
- vunpcklpd -4(%ebx,%ecx,8), %xmm2, %xmm5
-
-// CHECK: vcmpps $0, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
- vcmpps $0, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmpps $0, (%eax), %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
- vcmpps $0, (%eax), %xmm6, %xmm1
-
-// CHECK: vcmpps $7, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
- vcmpps $7, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmppd $0, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
- vcmppd $0, %xmm0, %xmm6, %xmm1
-
-// CHECK: vcmppd $0, (%eax), %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
- vcmppd $0, (%eax), %xmm6, %xmm1
-
-// CHECK: vcmppd $7, %xmm0, %xmm6, %xmm1
-// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
- vcmppd $7, %xmm0, %xmm6, %xmm1
-
-// CHECK: vshufps $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
- vshufps $8, %xmm1, %xmm2, %xmm3
-
-// CHECK: vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
- vshufps $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vshufpd $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
- vshufpd $8, %xmm1, %xmm2, %xmm3
-
-// CHECK: vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
- vshufpd $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
- vcmpeqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
- vcmpleps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
- vcmpltps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
- vcmpneqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
- vcmpnleps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
- vcmpnltps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
- vcmpordps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
- vcmpunordps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmpleps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnleps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordps -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordps -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
- vcmpeqpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
- vcmplepd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
- vcmpltpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
- vcmpneqpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
- vcmpnlepd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
- vcmpnltpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
- vcmpordpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
- vcmpunordpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmplepd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqpd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnlepd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltpd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordpd -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordpd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vmovmskps %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
- vmovmskps %xmm2, %eax
-
-// CHECK: vmovmskpd %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
- vmovmskpd %xmm2, %eax
-
-// CHECK: vcmpss $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
- vcmpeqss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
- vcmpless %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
- vcmpltss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
- vcmpneqss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
- vcmpnless %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
- vcmpnltss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
- vcmpordss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
- vcmpunordss %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpss $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqss -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmpless -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltss -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqss -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnless -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltss -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpss $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordss -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpss $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordss -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $0, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
- vcmpeqsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $2, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
- vcmplesd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $1, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
- vcmpltsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $4, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
- vcmpneqsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $6, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
- vcmpnlesd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $5, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
- vcmpnltsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
- vcmpordsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $3, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
- vcmpunordsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpsd $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmplesd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqsd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnlesd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltsd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vcmpsd $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordsd -4(%ebx,%ecx,8), %xmm6, %xmm2
-
-// CHECK: vcmpsd $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordsd -4(%ebx,%ecx,8), %xmm2, %xmm3
-
-// CHECK: vucomiss %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
- vucomiss %xmm1, %xmm2
-
-// CHECK: vucomiss (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
- vucomiss (%eax), %xmm2
-
-// CHECK: vcomiss %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
- vcomiss %xmm1, %xmm2
-
-// CHECK: vcomiss (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
- vcomiss (%eax), %xmm2
-
-// CHECK: vucomisd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
- vucomisd %xmm1, %xmm2
-
-// CHECK: vucomisd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
- vucomisd (%eax), %xmm2
-
-// CHECK: vcomisd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
- vcomisd %xmm1, %xmm2
-
-// CHECK: vcomisd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
- vcomisd (%eax), %xmm2
-
-// CHECK: vcvttss2si %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
- vcvttss2si %xmm1, %eax
-
-// CHECK: vcvttss2si (%ecx), %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
- vcvttss2si (%ecx), %eax
-
-// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
- vcvtsi2ss (%eax), %xmm1, %xmm2
-
-// CHECK: vcvtsi2ss (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
- vcvtsi2ss (%eax), %xmm1, %xmm2
-
-// CHECK: vcvttsd2si %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
- vcvttsd2si %xmm1, %eax
-
-// CHECK: vcvttsd2si (%ecx), %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
- vcvttsd2si (%ecx), %eax
-
-// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
- vcvtsi2sd (%eax), %xmm1, %xmm2
-
-// CHECK: vcvtsi2sd (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
- vcvtsi2sd (%eax), %xmm1, %xmm2
-
-// CHECK: vmovaps (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
- vmovaps (%eax), %xmm2
-
-// CHECK: vmovaps %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
- vmovaps %xmm1, %xmm2
-
-// CHECK: vmovaps %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
- vmovaps %xmm1, (%eax)
-
-// CHECK: vmovapd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
- vmovapd (%eax), %xmm2
-
-// CHECK: vmovapd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
- vmovapd %xmm1, %xmm2
-
-// CHECK: vmovapd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
- vmovapd %xmm1, (%eax)
-
-// CHECK: vmovups (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
- vmovups (%eax), %xmm2
-
-// CHECK: vmovups %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
- vmovups %xmm1, %xmm2
-
-// CHECK: vmovups %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
- vmovups %xmm1, (%eax)
-
-// CHECK: vmovupd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
- vmovupd (%eax), %xmm2
-
-// CHECK: vmovupd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
- vmovupd %xmm1, %xmm2
-
-// CHECK: vmovupd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
- vmovupd %xmm1, (%eax)
-
-// CHECK: vmovlps %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
- vmovlps %xmm1, (%eax)
-
-// CHECK: vmovlps (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
- vmovlps (%eax), %xmm2, %xmm3
-
-// CHECK: vmovlpd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
- vmovlpd %xmm1, (%eax)
-
-// CHECK: vmovlpd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
- vmovlpd (%eax), %xmm2, %xmm3
-
-// CHECK: vmovhps %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
- vmovhps %xmm1, (%eax)
-
-// CHECK: vmovhps (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
- vmovhps (%eax), %xmm2, %xmm3
-
-// CHECK: vmovhpd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
- vmovhpd %xmm1, (%eax)
-
-// CHECK: vmovhpd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
- vmovhpd (%eax), %xmm2, %xmm3
-
-// CHECK: vmovlhps %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
- vmovlhps %xmm1, %xmm2, %xmm3
-
-// CHECK: vmovhlps %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
- vmovhlps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcvtss2sil %xmm1, %eax
-// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
- vcvtss2si %xmm1, %eax
-
-// CHECK: vcvtss2sil (%eax), %ebx
-// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
- vcvtss2si (%eax), %ebx
-
-// CHECK: vcvtdq2ps %xmm5, %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
- vcvtdq2ps %xmm5, %xmm6
-
-// CHECK: vcvtdq2ps (%eax), %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
- vcvtdq2ps (%eax), %xmm6
-
-// CHECK: vcvtsd2ss %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
- vcvtsd2ss %xmm2, %xmm4, %xmm6
-
-// CHECK: vcvtsd2ss (%eax), %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
- vcvtsd2ss (%eax), %xmm4, %xmm6
-
-// CHECK: vcvtps2dq %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
- vcvtps2dq %xmm2, %xmm3
-
-// CHECK: vcvtps2dq (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
- vcvtps2dq (%eax), %xmm3
-
-// CHECK: vcvtss2sd %xmm2, %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
- vcvtss2sd %xmm2, %xmm4, %xmm6
-
-// CHECK: vcvtss2sd (%eax), %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
- vcvtss2sd (%eax), %xmm4, %xmm6
-
-// CHECK: vcvtdq2ps %xmm4, %xmm6
-// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
- vcvtdq2ps %xmm4, %xmm6
-
-// CHECK: vcvtdq2ps (%ecx), %xmm4
-// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
- vcvtdq2ps (%ecx), %xmm4
-
-// CHECK: vcvttps2dq %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
- vcvttps2dq %xmm2, %xmm3
-
-// CHECK: vcvttps2dq (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
- vcvttps2dq (%eax), %xmm3
-
-// CHECK: vcvtps2pd %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
- vcvtps2pd %xmm2, %xmm3
-
-// CHECK: vcvtps2pd (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
- vcvtps2pd (%eax), %xmm3
-
-// CHECK: vcvtpd2ps %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
- vcvtpd2ps %xmm2, %xmm3
-
-// CHECK: vsqrtpd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
- vsqrtpd %xmm1, %xmm2
-
-// CHECK: vsqrtpd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
- vsqrtpd (%eax), %xmm2
-
-// CHECK: vsqrtps %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
- vsqrtps %xmm1, %xmm2
-
-// CHECK: vsqrtps (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
- vsqrtps (%eax), %xmm2
-
-// CHECK: vsqrtsd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
- vsqrtsd %xmm1, %xmm2, %xmm3
-
-// CHECK: vsqrtsd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
- vsqrtsd (%eax), %xmm2, %xmm3
-
-// CHECK: vsqrtss %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
- vsqrtss %xmm1, %xmm2, %xmm3
-
-// CHECK: vsqrtss (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x51,0x18]
- vsqrtss (%eax), %xmm2, %xmm3
-
-// CHECK: vrsqrtps %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
- vrsqrtps %xmm1, %xmm2
-
-// CHECK: vrsqrtps (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
- vrsqrtps (%eax), %xmm2
-
-// CHECK: vrsqrtss %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
- vrsqrtss %xmm1, %xmm2, %xmm3
-
-// CHECK: vrsqrtss (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x52,0x18]
- vrsqrtss (%eax), %xmm2, %xmm3
-
-// CHECK: vrcpps %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
- vrcpps %xmm1, %xmm2
-
-// CHECK: vrcpps (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
- vrcpps (%eax), %xmm2
-
-// CHECK: vrcpss %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
- vrcpss %xmm1, %xmm2, %xmm3
-
-// CHECK: vrcpss (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xea,0x53,0x18]
- vrcpss (%eax), %xmm2, %xmm3
-
-// CHECK: vmovntdq %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
- vmovntdq %xmm1, (%eax)
-
-// CHECK: vmovntpd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
- vmovntpd %xmm1, (%eax)
-
-// CHECK: vmovntps %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
- vmovntps %xmm1, (%eax)
-
-// CHECK: vldmxcsr (%eax)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
- vldmxcsr (%eax)
-
-// CHECK: vstmxcsr (%eax)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
- vstmxcsr (%eax)
-
-// CHECK: vldmxcsr 3735928559
-// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
- vldmxcsr 0xdeadbeef
-
-// CHECK: vstmxcsr 3735928559
-// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
- vstmxcsr 0xdeadbeef
-
-// CHECK: vpsubb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
- vpsubb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
- vpsubb (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
- vpsubw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
- vpsubw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
- vpsubd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
- vpsubd (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
- vpsubq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
- vpsubq (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubsb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
- vpsubsb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubsb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
- vpsubsb (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
- vpsubsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
- vpsubsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubusb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
- vpsubusb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubusb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
- vpsubusb (%eax), %xmm2, %xmm3
-
-// CHECK: vpsubusw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
- vpsubusw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsubusw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
- vpsubusw (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
- vpaddb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
- vpaddb (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
- vpaddw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
- vpaddw (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
- vpaddd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
- vpaddd (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
- vpaddq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
- vpaddq (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddsb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
- vpaddsb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddsb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
- vpaddsb (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
- vpaddsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
- vpaddsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddusb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
- vpaddusb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddusb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
- vpaddusb (%eax), %xmm2, %xmm3
-
-// CHECK: vpaddusw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
- vpaddusw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpaddusw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
- vpaddusw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhuw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
- vpmulhuw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhuw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
- vpmulhuw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
- vpmulhw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
- vpmulhw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmullw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
- vpmullw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmullw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
- vpmullw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmuludq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
- vpmuludq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmuludq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
- vpmuludq (%eax), %xmm2, %xmm3
-
-// CHECK: vpavgb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
- vpavgb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpavgb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
- vpavgb (%eax), %xmm2, %xmm3
-
-// CHECK: vpavgw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
- vpavgw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpavgw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
- vpavgw (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
- vpminsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpminsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
- vpminsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpminub %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
- vpminub %xmm1, %xmm2, %xmm3
-
-// CHECK: vpminub (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
- vpminub (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
- vpmaxsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaxsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
- vpmaxsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxub %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
- vpmaxub %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaxub (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
- vpmaxub (%eax), %xmm2, %xmm3
-
-// CHECK: vpsadbw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
- vpsadbw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsadbw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
- vpsadbw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsllw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
- vpsllw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsllw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
- vpsllw (%eax), %xmm2, %xmm3
-
-// CHECK: vpslld %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
- vpslld %xmm1, %xmm2, %xmm3
-
-// CHECK: vpslld (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
- vpslld (%eax), %xmm2, %xmm3
-
-// CHECK: vpsllq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
- vpsllq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsllq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
- vpsllq (%eax), %xmm2, %xmm3
-
-// CHECK: vpsraw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
- vpsraw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsraw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
- vpsraw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrad %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
- vpsrad %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrad (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
- vpsrad (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrlw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
- vpsrlw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrlw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
- vpsrlw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrld %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
- vpsrld %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrld (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
- vpsrld (%eax), %xmm2, %xmm3
-
-// CHECK: vpsrlq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
- vpsrlq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsrlq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
- vpsrlq (%eax), %xmm2, %xmm3
-
-// CHECK: vpslld $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
- vpslld $10, %xmm2, %xmm3
-
-// CHECK: vpslldq $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
- vpslldq $10, %xmm2, %xmm3
-
-// CHECK: vpsllq $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
- vpsllq $10, %xmm2, %xmm3
-
-// CHECK: vpsllw $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
- vpsllw $10, %xmm2, %xmm3
-
-// CHECK: vpsrad $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
- vpsrad $10, %xmm2, %xmm3
-
-// CHECK: vpsraw $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
- vpsraw $10, %xmm2, %xmm3
-
-// CHECK: vpsrld $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
- vpsrld $10, %xmm2, %xmm3
-
-// CHECK: vpsrldq $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
- vpsrldq $10, %xmm2, %xmm3
-
-// CHECK: vpsrlq $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
- vpsrlq $10, %xmm2, %xmm3
-
-// CHECK: vpsrlw $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
- vpsrlw $10, %xmm2, %xmm3
-
-// CHECK: vpslld $10, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
- vpslld $10, %xmm2, %xmm3
-
-// CHECK: vpand %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
- vpand %xmm1, %xmm2, %xmm3
-
-// CHECK: vpand (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
- vpand (%eax), %xmm2, %xmm3
-
-// CHECK: vpor %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
- vpor %xmm1, %xmm2, %xmm3
-
-// CHECK: vpor (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
- vpor (%eax), %xmm2, %xmm3
-
-// CHECK: vpxor %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
- vpxor %xmm1, %xmm2, %xmm3
-
-// CHECK: vpxor (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
- vpxor (%eax), %xmm2, %xmm3
-
-// CHECK: vpandn %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
- vpandn %xmm1, %xmm2, %xmm3
-
-// CHECK: vpandn (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
- vpandn (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
- vpcmpeqb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
- vpcmpeqb (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
- vpcmpeqw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
- vpcmpeqw (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
- vpcmpeqd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpeqd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
- vpcmpeqd (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
- vpcmpgtb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
- vpcmpgtb (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
- vpcmpgtw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
- vpcmpgtw (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpgtd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
- vpcmpgtd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpcmpgtd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
- vpcmpgtd (%eax), %xmm2, %xmm3
-
-// CHECK: vpacksswb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
- vpacksswb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpacksswb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
- vpacksswb (%eax), %xmm2, %xmm3
-
-// CHECK: vpackssdw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
- vpackssdw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpackssdw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
- vpackssdw (%eax), %xmm2, %xmm3
-
-// CHECK: vpackuswb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
- vpackuswb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpackuswb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
- vpackuswb (%eax), %xmm2, %xmm3
-
-// CHECK: vpshufd $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
- vpshufd $4, %xmm2, %xmm3
-
-// CHECK: vpshufd $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
- vpshufd $4, (%eax), %xmm3
-
-// CHECK: vpshufhw $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
- vpshufhw $4, %xmm2, %xmm3
-
-// CHECK: vpshufhw $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
- vpshufhw $4, (%eax), %xmm3
-
-// CHECK: vpshuflw $4, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
- vpshuflw $4, %xmm2, %xmm3
-
-// CHECK: vpshuflw $4, (%eax), %xmm3
-// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
- vpshuflw $4, (%eax), %xmm3
-
-// CHECK: vpunpcklbw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
- vpunpcklbw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklbw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
- vpunpcklbw (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpcklwd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
- vpunpcklwd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklwd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
- vpunpcklwd (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckldq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
- vpunpckldq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckldq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
- vpunpckldq (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpcklqdq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
- vpunpcklqdq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpcklqdq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
- vpunpcklqdq (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhbw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
- vpunpckhbw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhbw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
- vpunpckhbw (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhwd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
- vpunpckhwd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhwd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
- vpunpckhwd (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhdq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
- vpunpckhdq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhdq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
- vpunpckhdq (%eax), %xmm2, %xmm3
-
-// CHECK: vpunpckhqdq %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
- vpunpckhqdq %xmm1, %xmm2, %xmm3
-
-// CHECK: vpunpckhqdq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
- vpunpckhqdq (%eax), %xmm2, %xmm3
-
-// CHECK: vpinsrw $7, %eax, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
- vpinsrw $7, %eax, %xmm2, %xmm3
-
-// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
- vpinsrw $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vpextrw $7, %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
- vpextrw $7, %xmm2, %eax
-
-// CHECK: vpmovmskb %xmm1, %eax
-// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
- vpmovmskb %xmm1, %eax
-
-// CHECK: vmaskmovdqu %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
- vmaskmovdqu %xmm1, %xmm2
-
-// CHECK: vmovd %xmm1, %eax
-// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
- vmovd %xmm1, %eax
-
-// CHECK: vmovd %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
- vmovd %xmm1, (%eax)
-
-// CHECK: vmovd %eax, %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
- vmovd %eax, %xmm1
-
-// CHECK: vmovd (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
- vmovd (%eax), %xmm1
-
-// CHECK: vmovq %xmm1, (%eax)
-// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
- vmovq %xmm1, (%eax)
-
-// CHECK: vmovq %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
- vmovq %xmm1, %xmm2
-
-// CHECK: vmovq (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
- vmovq (%eax), %xmm1
-
-// CHECK: vcvtpd2dq %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
- vcvtpd2dq %xmm1, %xmm2
-
-// CHECK: vcvtdq2pd %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
- vcvtdq2pd %xmm1, %xmm2
-
-// CHECK: vcvtdq2pd (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
- vcvtdq2pd (%eax), %xmm2
-
-// CHECK: vmovshdup %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
- vmovshdup %xmm1, %xmm2
-
-// CHECK: vmovshdup (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
- vmovshdup (%eax), %xmm2
-
-// CHECK: vmovsldup %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
- vmovsldup %xmm1, %xmm2
-
-// CHECK: vmovsldup (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
- vmovsldup (%eax), %xmm2
-
-// CHECK: vmovddup %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
- vmovddup %xmm1, %xmm2
-
-// CHECK: vmovddup (%eax), %xmm2
-// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
- vmovddup (%eax), %xmm2
-
-// CHECK: vaddsubps %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
- vaddsubps %xmm1, %xmm2, %xmm3
-
-// CHECK: vaddsubps (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
- vaddsubps (%eax), %xmm1, %xmm2
-
-// CHECK: vaddsubpd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
- vaddsubpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vaddsubpd (%eax), %xmm1, %xmm2
-// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
- vaddsubpd (%eax), %xmm1, %xmm2
-
-// CHECK: vhaddps %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
- vhaddps %xmm1, %xmm2, %xmm3
-
-// CHECK: vhaddps (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
- vhaddps (%eax), %xmm2, %xmm3
-
-// CHECK: vhaddpd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
- vhaddpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vhaddpd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
- vhaddpd (%eax), %xmm2, %xmm3
-
-// CHECK: vhsubps %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
- vhsubps %xmm1, %xmm2, %xmm3
-
-// CHECK: vhsubps (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
- vhsubps (%eax), %xmm2, %xmm3
-
-// CHECK: vhsubpd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
- vhsubpd %xmm1, %xmm2, %xmm3
-
-// CHECK: vhsubpd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
- vhsubpd (%eax), %xmm2, %xmm3
-
-// CHECK: vpabsb %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
- vpabsb %xmm1, %xmm2
-
-// CHECK: vpabsb (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
- vpabsb (%eax), %xmm2
-
-// CHECK: vpabsw %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
- vpabsw %xmm1, %xmm2
-
-// CHECK: vpabsw (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
- vpabsw (%eax), %xmm2
-
-// CHECK: vpabsd %xmm1, %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
- vpabsd %xmm1, %xmm2
-
-// CHECK: vpabsd (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
- vpabsd (%eax), %xmm2
-
-// CHECK: vphaddw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
- vphaddw %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
- vphaddw (%eax), %xmm2, %xmm3
-
-// CHECK: vphaddd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
- vphaddd %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
- vphaddd (%eax), %xmm2, %xmm3
-
-// CHECK: vphaddsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
- vphaddsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vphaddsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
- vphaddsw (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
- vphsubw %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
- vphsubw (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
- vphsubd %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
- vphsubd (%eax), %xmm2, %xmm3
-
-// CHECK: vphsubsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
- vphsubsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vphsubsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
- vphsubsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaddubsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
- vpmaddubsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmaddubsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
- vpmaddubsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpshufb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
- vpshufb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpshufb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
- vpshufb (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignb %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
- vpsignb %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
- vpsignb (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
- vpsignw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
- vpsignw (%eax), %xmm2, %xmm3
-
-// CHECK: vpsignd %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
- vpsignd %xmm1, %xmm2, %xmm3
-
-// CHECK: vpsignd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
- vpsignd (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulhrsw %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
- vpmulhrsw %xmm1, %xmm2, %xmm3
-
-// CHECK: vpmulhrsw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
- vpmulhrsw (%eax), %xmm2, %xmm3
-
-// CHECK: vpalignr $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
- vpalignr $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vpalignr $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
- vpalignr $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundsd $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07]
- vroundsd $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vroundsd $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07]
- vroundsd $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundss $7, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07]
- vroundss $7, %xmm1, %xmm2, %xmm3
-
-// CHECK: vroundss $7, (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07]
- vroundss $7, (%eax), %xmm2, %xmm3
-
-// CHECK: vroundpd $7, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07]
- vroundpd $7, %xmm2, %xmm3
-
-// CHECK: vroundpd $7, (%eax), %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07]
- vroundpd $7, (%eax), %xmm3
-
-// CHECK: vroundps $7, %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07]
- vroundps $7, %xmm2, %xmm3
-
-// CHECK: vroundps $7, (%eax), %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07]
- vroundps $7, (%eax), %xmm3
-
-// CHECK: vphminposuw %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda]
- vphminposuw %xmm2, %xmm3
-
-// CHECK: vphminposuw (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
- vphminposuw (%eax), %xmm2
-
-// CHECK: vpackusdw %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
- vpackusdw %xmm2, %xmm3, %xmm1
-
-// CHECK: vpackusdw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
- vpackusdw (%eax), %xmm2, %xmm3
-
-// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
- vpcmpeqq %xmm2, %xmm3, %xmm1
-
-// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
- vpcmpeqq (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsb %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
- vpminsb %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminsb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
- vpminsb (%eax), %xmm2, %xmm3
-
-// CHECK: vpminsd %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
- vpminsd %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminsd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
- vpminsd (%eax), %xmm2, %xmm3
-
-// CHECK: vpminud %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
- vpminud %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminud (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
- vpminud (%eax), %xmm2, %xmm3
-
-// CHECK: vpminuw %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
- vpminuw %xmm2, %xmm3, %xmm1
-
-// CHECK: vpminuw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
- vpminuw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
- vpmaxsb %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxsb (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
- vpmaxsb (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
- vpmaxsd %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxsd (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
- vpmaxsd (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxud %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
- vpmaxud %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxud (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
- vpmaxud (%eax), %xmm2, %xmm3
-
-// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
- vpmaxuw %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmaxuw (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
- vpmaxuw (%eax), %xmm2, %xmm3
-
-// CHECK: vpmuldq %xmm2, %xmm3, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
- vpmuldq %xmm2, %xmm3, %xmm1
-
-// CHECK: vpmuldq (%eax), %xmm2, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
- vpmuldq (%eax), %xmm2, %xmm3
-
-// CHECK: vpmulld %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca]
- vpmulld %xmm2, %xmm5, %xmm1
-
-// CHECK: vpmulld (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18]
- vpmulld (%eax), %xmm5, %xmm3
-
-// CHECK: vblendps $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03]
- vblendps $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vblendps $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03]
- vblendps $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vblendpd $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03]
- vblendpd $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vblendpd $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03]
- vblendpd $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vpblendw $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03]
- vpblendw $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vpblendw $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03]
- vpblendw $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vmpsadbw $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03]
- vmpsadbw $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vmpsadbw $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03]
- vmpsadbw $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vdpps $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03]
- vdpps $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vdpps $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03]
- vdpps $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vdppd $3, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03]
- vdppd $3, %xmm2, %xmm5, %xmm1
-
-// CHECK: vdppd $3, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
- vdppd $3, (%eax), %xmm5, %xmm1
-
-// CHECK: vblendvpd %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
- vblendvpd %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vblendvpd %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
- vblendvpd %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vblendvps %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
- vblendvps %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vblendvps %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
- vblendvps %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vpblendvb %xmm2, %xmm5, %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
- vpblendvb %xmm2, %xmm5, %xmm1, %xmm3
-
-// CHECK: vpblendvb %xmm2, (%eax), %xmm1, %xmm3
-// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
- vpblendvb %xmm2, (%eax), %xmm1, %xmm3
-
-// CHECK: vpmovsxbw %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea]
- vpmovsxbw %xmm2, %xmm5
-
-// CHECK: vpmovsxbw (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10]
- vpmovsxbw (%eax), %xmm2
-
-// CHECK: vpmovsxwd %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea]
- vpmovsxwd %xmm2, %xmm5
-
-// CHECK: vpmovsxwd (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10]
- vpmovsxwd (%eax), %xmm2
-
-// CHECK: vpmovsxdq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea]
- vpmovsxdq %xmm2, %xmm5
-
-// CHECK: vpmovsxdq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10]
- vpmovsxdq (%eax), %xmm2
-
-// CHECK: vpmovzxbw %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea]
- vpmovzxbw %xmm2, %xmm5
-
-// CHECK: vpmovzxbw (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10]
- vpmovzxbw (%eax), %xmm2
-
-// CHECK: vpmovzxwd %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea]
- vpmovzxwd %xmm2, %xmm5
-
-// CHECK: vpmovzxwd (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10]
- vpmovzxwd (%eax), %xmm2
-
-// CHECK: vpmovzxdq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea]
- vpmovzxdq %xmm2, %xmm5
-
-// CHECK: vpmovzxdq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10]
- vpmovzxdq (%eax), %xmm2
-
-// CHECK: vpmovsxbq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea]
- vpmovsxbq %xmm2, %xmm5
-
-// CHECK: vpmovsxbq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10]
- vpmovsxbq (%eax), %xmm2
-
-// CHECK: vpmovzxbq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea]
- vpmovzxbq %xmm2, %xmm5
-
-// CHECK: vpmovzxbq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10]
- vpmovzxbq (%eax), %xmm2
-
-// CHECK: vpmovsxbd %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea]
- vpmovsxbd %xmm2, %xmm5
-
-// CHECK: vpmovsxbd (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10]
- vpmovsxbd (%eax), %xmm2
-
-// CHECK: vpmovsxwq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea]
- vpmovsxwq %xmm2, %xmm5
-
-// CHECK: vpmovsxwq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10]
- vpmovsxwq (%eax), %xmm2
-
-// CHECK: vpmovzxbd %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea]
- vpmovzxbd %xmm2, %xmm5
-
-// CHECK: vpmovzxbd (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10]
- vpmovzxbd (%eax), %xmm2
-
-// CHECK: vpmovzxwq %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea]
- vpmovzxwq %xmm2, %xmm5
-
-// CHECK: vpmovzxwq (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10]
- vpmovzxwq (%eax), %xmm2
-
-// CHECK: vpextrw $7, %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
- vpextrw $7, %xmm2, %eax
-
-// CHECK: vpextrw $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07]
- vpextrw $7, %xmm2, (%eax)
-
-// CHECK: vpextrd $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07]
- vpextrd $7, %xmm2, %eax
-
-// CHECK: vpextrd $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07]
- vpextrd $7, %xmm2, (%eax)
-
-// CHECK: vpextrb $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07]
- vpextrb $7, %xmm2, %eax
-
-// CHECK: vpextrb $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
- vpextrb $7, %xmm2, (%eax)
-
-// CHECK: vextractps $7, %xmm2, (%eax)
-// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
- vextractps $7, %xmm2, (%eax)
-
-// CHECK: vextractps $7, %xmm2, %eax
-// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
- vextractps $7, %xmm2, %eax
-
-// CHECK: vpinsrw $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
- vpinsrw $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrw $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
- vpinsrw $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vpinsrb $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
- vpinsrb $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrb $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
- vpinsrb $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vpinsrd $7, %eax, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
- vpinsrd $7, %eax, %xmm2, %xmm5
-
-// CHECK: vpinsrd $7, (%eax), %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
- vpinsrd $7, (%eax), %xmm2, %xmm5
-
-// CHECK: vinsertps $7, %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
- vinsertps $7, %xmm2, %xmm5, %xmm1
-
-// CHECK: vinsertps $7, (%eax), %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
- vinsertps $7, (%eax), %xmm5, %xmm1
-
-// CHECK: vptest %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
- vptest %xmm2, %xmm5
-
-// CHECK: vptest (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
- vptest (%eax), %xmm2
-
-// CHECK: vmovntdqa (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
- vmovntdqa (%eax), %xmm2
-
-// CHECK: vpcmpgtq %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca]
- vpcmpgtq %xmm2, %xmm5, %xmm1
-
-// CHECK: vpcmpgtq (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18]
- vpcmpgtq (%eax), %xmm5, %xmm3
-
-// CHECK: vpcmpistrm $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07]
- vpcmpistrm $7, %xmm2, %xmm5
-
-// CHECK: vpcmpistrm $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07]
- vpcmpistrm $7, (%eax), %xmm5
-
-// CHECK: vpcmpestrm $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07]
- vpcmpestrm $7, %xmm2, %xmm5
-
-// CHECK: vpcmpestrm $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07]
- vpcmpestrm $7, (%eax), %xmm5
-
-// CHECK: vpcmpistri $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07]
- vpcmpistri $7, %xmm2, %xmm5
-
-// CHECK: vpcmpistri $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07]
- vpcmpistri $7, (%eax), %xmm5
-
-// CHECK: vpcmpestri $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07]
- vpcmpestri $7, %xmm2, %xmm5
-
-// CHECK: vpcmpestri $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07]
- vpcmpestri $7, (%eax), %xmm5
-
-// CHECK: vaesimc %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea]
- vaesimc %xmm2, %xmm5
-
-// CHECK: vaesimc (%eax), %xmm2
-// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10]
- vaesimc (%eax), %xmm2
-
-// CHECK: vaesenc %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca]
- vaesenc %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesenc (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18]
- vaesenc (%eax), %xmm5, %xmm3
-
-// CHECK: vaesenclast %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca]
- vaesenclast %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesenclast (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18]
- vaesenclast (%eax), %xmm5, %xmm3
-
-// CHECK: vaesdec %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca]
- vaesdec %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesdec (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18]
- vaesdec (%eax), %xmm5, %xmm3
-
-// CHECK: vaesdeclast %xmm2, %xmm5, %xmm1
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca]
- vaesdeclast %xmm2, %xmm5, %xmm1
-
-// CHECK: vaesdeclast (%eax), %xmm5, %xmm3
-// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18]
- vaesdeclast (%eax), %xmm5, %xmm3
-
-// CHECK: vaeskeygenassist $7, %xmm2, %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07]
- vaeskeygenassist $7, %xmm2, %xmm5
-
-// CHECK: vaeskeygenassist $7, (%eax), %xmm5
-// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07]
- vaeskeygenassist $7, (%eax), %xmm5
-
-// CHECK: vcmpps $8, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08]
- vcmpeq_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $9, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09]
- vcmpngeps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $10, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a]
- vcmpngtps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $11, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b]
- vcmpfalseps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $12, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c]
- vcmpneq_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $13, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d]
- vcmpgeps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $14, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e]
- vcmpgtps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $15, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f]
- vcmptrueps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $16, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10]
- vcmpeq_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $17, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11]
- vcmplt_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $18, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12]
- vcmple_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $19, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13]
- vcmpunord_sps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $20, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14]
- vcmpneq_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $21, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15]
- vcmpnlt_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $22, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16]
- vcmpnle_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $23, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17]
- vcmpord_sps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $24, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18]
- vcmpeq_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $25, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19]
- vcmpnge_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $26, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a]
- vcmpngt_uqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $27, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b]
- vcmpfalse_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $28, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c]
- vcmpneq_osps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $29, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d]
- vcmpge_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $30, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e]
- vcmpgt_oqps %xmm1, %xmm2, %xmm3
-
-// CHECK: vcmpps $31, %xmm1, %xmm2, %xmm3
-// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
- vcmptrue_usps %xmm1, %xmm2, %xmm3
-
-// CHECK: vmovaps (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
- vmovaps (%eax), %ymm2
-
-// CHECK: vmovaps %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
- vmovaps %ymm1, %ymm2
-
-// CHECK: vmovaps %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
- vmovaps %ymm1, (%eax)
-
-// CHECK: vmovapd (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
- vmovapd (%eax), %ymm2
-
-// CHECK: vmovapd %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
- vmovapd %ymm1, %ymm2
-
-// CHECK: vmovapd %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
- vmovapd %ymm1, (%eax)
-
-// CHECK: vmovups (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
- vmovups (%eax), %ymm2
-
-// CHECK: vmovups %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
- vmovups %ymm1, %ymm2
-
-// CHECK: vmovups %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
- vmovups %ymm1, (%eax)
-
-// CHECK: vmovupd (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
- vmovupd (%eax), %ymm2
-
-// CHECK: vmovupd %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
- vmovupd %ymm1, %ymm2
-
-// CHECK: vmovupd %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
- vmovupd %ymm1, (%eax)
-
-// CHECK: vunpckhps %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
- vunpckhps %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpckhpd %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
- vunpckhpd %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpcklps %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
- vunpcklps %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpcklpd %ymm1, %ymm2, %ymm4
-// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
- vunpcklpd %ymm1, %ymm2, %ymm4
-
-// CHECK: vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
- vunpckhps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
- vunpckhpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
- vunpcklps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
- vunpcklpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vmovntdq %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0xe7,0x08]
- vmovntdq %ymm1, (%eax)
-
-// CHECK: vmovntpd %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfd,0x2b,0x08]
- vmovntpd %ymm1, (%eax)
-
-// CHECK: vmovntps %ymm1, (%eax)
-// CHECK: encoding: [0xc5,0xfc,0x2b,0x08]
- vmovntps %ymm1, (%eax)
-
-// CHECK: vmovmskps %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
- vmovmskps %xmm2, %eax
-
-// CHECK: vmovmskpd %xmm2, %eax
-// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
- vmovmskpd %xmm2, %eax
-
-// CHECK: vmaxps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2]
- vmaxps %ymm2, %ymm4, %ymm6
-
-// CHECK: vmaxpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2]
- vmaxpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vminps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2]
- vminps %ymm2, %ymm4, %ymm6
-
-// CHECK: vminpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2]
- vminpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vsubps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2]
- vsubps %ymm2, %ymm4, %ymm6
-
-// CHECK: vsubpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2]
- vsubpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vdivps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2]
- vdivps %ymm2, %ymm4, %ymm6
-
-// CHECK: vdivpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2]
- vdivpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vaddps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0xf2]
- vaddps %ymm2, %ymm4, %ymm6
-
-// CHECK: vaddpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0xf2]
- vaddpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vmulps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0xf2]
- vmulps %ymm2, %ymm4, %ymm6
-
-// CHECK: vmulpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0xf2]
- vmulpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vmaxps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
- vmaxps (%eax), %ymm4, %ymm6
-
-// CHECK: vmaxpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
- vmaxpd (%eax), %ymm4, %ymm6
-
-// CHECK: vminps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
- vminps (%eax), %ymm4, %ymm6
-
-// CHECK: vminpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
- vminpd (%eax), %ymm4, %ymm6
-
-// CHECK: vsubps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
- vsubps (%eax), %ymm4, %ymm6
-
-// CHECK: vsubpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
- vsubpd (%eax), %ymm4, %ymm6
-
-// CHECK: vdivps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
- vdivps (%eax), %ymm4, %ymm6
-
-// CHECK: vdivpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
- vdivpd (%eax), %ymm4, %ymm6
-
-// CHECK: vaddps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
- vaddps (%eax), %ymm4, %ymm6
-
-// CHECK: vaddpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
- vaddpd (%eax), %ymm4, %ymm6
-
-// CHECK: vmulps (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
- vmulps (%eax), %ymm4, %ymm6
-
-// CHECK: vmulpd (%eax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
- vmulpd (%eax), %ymm4, %ymm6
-
-// CHECK: vsqrtpd %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x51,0xd1]
- vsqrtpd %ymm1, %ymm2
-
-// CHECK: vsqrtpd (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfd,0x51,0x10]
- vsqrtpd (%eax), %ymm2
-
-// CHECK: vsqrtps %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x51,0xd1]
- vsqrtps %ymm1, %ymm2
-
-// CHECK: vsqrtps (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x51,0x10]
- vsqrtps (%eax), %ymm2
-
-// CHECK: vrsqrtps %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x52,0xd1]
- vrsqrtps %ymm1, %ymm2
-
-// CHECK: vrsqrtps (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x52,0x10]
- vrsqrtps (%eax), %ymm2
-
-// CHECK: vrcpps %ymm1, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x53,0xd1]
- vrcpps %ymm1, %ymm2
-
-// CHECK: vrcpps (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x53,0x10]
- vrcpps (%eax), %ymm2
-
-// CHECK: vandps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x54,0xf2]
- vandps %ymm2, %ymm4, %ymm6
-
-// CHECK: vandpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x54,0xf2]
- vandpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vandps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc]
- vandps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc]
- vandpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vorps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x56,0xf2]
- vorps %ymm2, %ymm4, %ymm6
-
-// CHECK: vorpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x56,0xf2]
- vorpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vorps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc]
- vorps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc]
- vorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vxorps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x57,0xf2]
- vxorps %ymm2, %ymm4, %ymm6
-
-// CHECK: vxorpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x57,0xf2]
- vxorpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc]
- vxorps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc]
- vxorpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandnps %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x55,0xf2]
- vandnps %ymm2, %ymm4, %ymm6
-
-// CHECK: vandnpd %ymm2, %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x55,0xf2]
- vandnpd %ymm2, %ymm4, %ymm6
-
-// CHECK: vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc]
- vandnps -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
- vandnpd -4(%ebx,%ecx,8), %ymm2, %ymm5
-
-// CHECK: vcvtps2pd %xmm3, %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
- vcvtps2pd %xmm3, %ymm2
-
-// CHECK: vcvtps2pd (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
- vcvtps2pd (%eax), %ymm2
-
-// CHECK: vcvtdq2pd %xmm3, %ymm2
-// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
- vcvtdq2pd %xmm3, %ymm2
-
-// CHECK: vcvtdq2pd (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
- vcvtdq2pd (%eax), %ymm2
-
-// CHECK: vcvtdq2ps %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
- vcvtdq2ps %ymm2, %ymm5
-
-// CHECK: vcvtdq2ps (%eax), %ymm2
-// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
- vcvtdq2ps (%eax), %ymm2
-
-// CHECK: vcvtps2dq %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
- vcvtps2dq %ymm2, %ymm5
-
-// CHECK: vcvtps2dq (%eax), %ymm5
-// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
- vcvtps2dq (%eax), %ymm5
-
-// CHECK: vcvttps2dq %ymm2, %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
- vcvttps2dq %ymm2, %ymm5
-
-// CHECK: vcvttps2dq (%eax), %ymm5
-// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
- vcvttps2dq (%eax), %ymm5
-
-// CHECK: vcvttpd2dq %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
- vcvttpd2dq %xmm1, %xmm5
-
-// CHECK: vcvttpd2dq %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
- vcvttpd2dq %ymm2, %xmm5
-
-// CHECK: vcvttpd2dqx %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
- vcvttpd2dqx %xmm1, %xmm5
-
-// CHECK: vcvttpd2dqx (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
- vcvttpd2dqx (%eax), %xmm1
-
-// CHECK: vcvttpd2dqy %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
- vcvttpd2dqy %ymm2, %xmm1
-
-// CHECK: vcvttpd2dqy (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
- vcvttpd2dqy (%eax), %xmm1
-
-// CHECK: vcvtpd2ps %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
- vcvtpd2ps %ymm2, %xmm5
-
-// CHECK: vcvtpd2psx %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
- vcvtpd2psx %xmm1, %xmm5
-
-// CHECK: vcvtpd2psx (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
- vcvtpd2psx (%eax), %xmm1
-
-// CHECK: vcvtpd2psy %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
- vcvtpd2psy %ymm2, %xmm1
-
-// CHECK: vcvtpd2psy (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
- vcvtpd2psy (%eax), %xmm1
-
-// CHECK: vcvtpd2dq %ymm2, %xmm5
-// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
- vcvtpd2dq %ymm2, %xmm5
-
-// CHECK: vcvtpd2dqy %ymm2, %xmm1
-// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
- vcvtpd2dqy %ymm2, %xmm1
-
-// CHECK: vcvtpd2dqy (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
- vcvtpd2dqy (%eax), %xmm1
-
-// CHECK: vcvtpd2dqx %xmm1, %xmm5
-// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
- vcvtpd2dqx %xmm1, %xmm5
-
-// CHECK: vcvtpd2dqx (%eax), %xmm1
-// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
- vcvtpd2dqx (%eax), %xmm1
-
-// CHECK: vcmpps $0, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
- vcmpeqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $2, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
- vcmpleps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $1, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
- vcmpltps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $4, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
- vcmpneqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $6, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
- vcmpnleps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $5, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
- vcmpnltps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $7, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
- vcmpordps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $3, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
- vcmpunordps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
-// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
-
-// CHECK: vcmpps $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $0, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
- vcmpeqpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $2, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
- vcmplepd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $1, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
- vcmpltpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $4, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
- vcmpneqpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $6, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
- vcmpnlepd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $5, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
- vcmpnltpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $7, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
- vcmpordpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $3, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
- vcmpunordpd %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmppd $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
- vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
- vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
- vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
- vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
- vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
- vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmppd $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
-// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
-
-// CHECK: vcmppd $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
- vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
-
-// CHECK: vcmpps $8, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
- vcmpeq_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $9, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
- vcmpngeps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $10, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
- vcmpngtps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $11, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
- vcmpfalseps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $12, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
- vcmpneq_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $13, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
- vcmpgeps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $14, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
- vcmpgtps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $15, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
- vcmptrueps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $16, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
- vcmpeq_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $17, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
- vcmplt_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $18, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
- vcmple_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $19, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
- vcmpunord_sps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $20, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
- vcmpneq_usps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $21, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
- vcmpnlt_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $22, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
- vcmpnle_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $23, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
- vcmpord_sps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $24, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
- vcmpeq_usps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $25, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
- vcmpnge_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $26, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
- vcmpngt_uqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $27, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
- vcmpfalse_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $28, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
- vcmpneq_osps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $29, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
- vcmpge_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $30, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
- vcmpgt_oqps %ymm1, %ymm2, %ymm3
-
-// CHECK: vcmpps $31, %ymm1, %ymm2, %ymm3
-// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
- vcmptrue_usps %ymm1, %ymm2, %ymm3
+// CHECK: movl (%ecx,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe1]
+ movl (%ecx,%eiz,8), %eax
diff --git a/test/MC/AsmParser/X86/x86_32-fma3-encoding.s b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s
new file mode 100644
index 000000000000..db7efecfb51b
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_32-fma3-encoding.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+ vfmadd132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+ vfmadd132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+ vfmadd132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+ vfmadd132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+ vfmadd213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+ vfmadd213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+ vfmadd213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+ vfmadd213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+ vfmadd231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+ vfmadd231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+ vfmadd231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+ vfmadd231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+ vfmadd132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+ vfmadd132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+ vfmadd132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+ vfmadd132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+ vfmadd213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+ vfmadd213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+ vfmadd213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+ vfmadd213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+ vfmadd231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+ vfmadd231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+ vfmadd231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+ vfmadd231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+ vfmadd132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+ vfmadd132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+ vfmadd132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+ vfmadd132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+ vfmadd213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+ vfmadd213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+ vfmadd213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+ vfmadd213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+ vfmadd231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+ vfmadd231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+ vfmadd231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+ vfmadd231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca]
+ vfmaddsub132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08]
+ vfmaddsub132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca]
+ vfmaddsub132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08]
+ vfmaddsub132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca]
+ vfmaddsub213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08]
+ vfmaddsub213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca]
+ vfmaddsub213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08]
+ vfmaddsub213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca]
+ vfmaddsub231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08]
+ vfmaddsub231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca]
+ vfmaddsub231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08]
+ vfmaddsub231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca]
+ vfmsubadd132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08]
+ vfmsubadd132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca]
+ vfmsubadd132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08]
+ vfmsubadd132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca]
+ vfmsubadd213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08]
+ vfmsubadd213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca]
+ vfmsubadd213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08]
+ vfmsubadd213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca]
+ vfmsubadd231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08]
+ vfmsubadd231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca]
+ vfmsubadd231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08]
+ vfmsubadd231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca]
+ vfmsub132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08]
+ vfmsub132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca]
+ vfmsub132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08]
+ vfmsub132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca]
+ vfmsub213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08]
+ vfmsub213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca]
+ vfmsub213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08]
+ vfmsub213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca]
+ vfmsub231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08]
+ vfmsub231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca]
+ vfmsub231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08]
+ vfmsub231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca]
+ vfnmadd132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08]
+ vfnmadd132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca]
+ vfnmadd132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08]
+ vfnmadd132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca]
+ vfnmadd213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08]
+ vfnmadd213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca]
+ vfnmadd213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08]
+ vfnmadd213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca]
+ vfnmadd231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08]
+ vfnmadd231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca]
+ vfnmadd231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08]
+ vfnmadd231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca]
+ vfnmsub132pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08]
+ vfnmsub132pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca]
+ vfnmsub132ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08]
+ vfnmsub132ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca]
+ vfnmsub213pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08]
+ vfnmsub213pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca]
+ vfnmsub213ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08]
+ vfnmsub213ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca]
+ vfnmsub231pd %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08]
+ vfnmsub231pd (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca]
+ vfnmsub231ps %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08]
+ vfnmsub231ps (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+ vfmadd132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+ vfmadd132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+ vfmadd132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+ vfmadd132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+ vfmadd213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+ vfmadd213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+ vfmadd213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+ vfmadd213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+ vfmadd231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+ vfmadd231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+ vfmadd231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+ vfmadd231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca]
+ vfmaddsub132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08]
+ vfmaddsub132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca]
+ vfmaddsub132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08]
+ vfmaddsub132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca]
+ vfmaddsub213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08]
+ vfmaddsub213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca]
+ vfmaddsub213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08]
+ vfmaddsub213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca]
+ vfmaddsub231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08]
+ vfmaddsub231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca]
+ vfmaddsub231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08]
+ vfmaddsub231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca]
+ vfmsubadd132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08]
+ vfmsubadd132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca]
+ vfmsubadd132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08]
+ vfmsubadd132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca]
+ vfmsubadd213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08]
+ vfmsubadd213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca]
+ vfmsubadd213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08]
+ vfmsubadd213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca]
+ vfmsubadd231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08]
+ vfmsubadd231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca]
+ vfmsubadd231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08]
+ vfmsubadd231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca]
+ vfmsub132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08]
+ vfmsub132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca]
+ vfmsub132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08]
+ vfmsub132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca]
+ vfmsub213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08]
+ vfmsub213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca]
+ vfmsub213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08]
+ vfmsub213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca]
+ vfmsub231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08]
+ vfmsub231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca]
+ vfmsub231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08]
+ vfmsub231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca]
+ vfnmadd132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08]
+ vfnmadd132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca]
+ vfnmadd132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08]
+ vfnmadd132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca]
+ vfnmadd213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08]
+ vfnmadd213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca]
+ vfnmadd213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08]
+ vfnmadd213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca]
+ vfnmadd231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08]
+ vfnmadd231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca]
+ vfnmadd231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08]
+ vfnmadd231ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca]
+ vfnmsub132pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08]
+ vfnmsub132pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca]
+ vfnmsub132ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08]
+ vfnmsub132ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca]
+ vfnmsub213pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08]
+ vfnmsub213pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca]
+ vfnmsub213ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08]
+ vfnmsub213ps (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca]
+ vfnmsub231pd %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08]
+ vfnmsub231pd (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca]
+ vfnmsub231ps %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08]
+ vfnmsub231ps (%eax), %ymm5, %ymm1
+
diff --git a/test/MC/AsmParser/X86/x86_32-new-encoder.s b/test/MC/AsmParser/X86/x86_32-new-encoder.s
index e97e4940a42c..e3aa1887ef81 100644
--- a/test/MC/AsmParser/X86/x86_32-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_32-new-encoder.s
@@ -415,3 +415,11 @@ retl
// CHECK: encoding: [0x61]
popal
+// CHECK: jmpl *8(%eax)
+// CHECK: encoding: [0xff,0x60,0x08]
+ jmp *8(%eax)
+
+// PR7465
+// CHECK: lcalll $2, $4660
+// CHECK: encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00]
+lcalll $0x2, $0x1234
diff --git a/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s
new file mode 100644
index 000000000000..67e82c6cd0d2
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-avx-clmul-encoding.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+ vpclmulhqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+ vpclmulhqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq $1, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01]
+ vpclmulhqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq $1, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01]
+ vpclmulhqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq $16, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10]
+ vpclmullqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq $16, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10]
+ vpclmullqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq $0, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00]
+ vpclmullqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq $0, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00]
+ vpclmullqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+ vpclmulqdq $17, %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+ vpclmulqdq $17, (%rax), %xmm10, %xmm13
+
diff --git a/test/MC/AsmParser/X86/x86_64-avx-encoding.s b/test/MC/AsmParser/X86/x86_64-avx-encoding.s
new file mode 100644
index 000000000000..7a96bb5a2b48
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-avx-encoding.s
@@ -0,0 +1,3318 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0]
+vaddss %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulss %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0]
+vmulss %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubss %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0]
+vsubss %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivss %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0]
+vdivss %xmm8, %xmm9, %xmm10
+
+// CHECK: vaddsd %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0]
+vaddsd %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulsd %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0]
+vmulsd %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubsd %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0]
+vsubsd %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivsd %xmm8, %xmm9, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0]
+vdivsd %xmm8, %xmm9, %xmm10
+
+// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc]
+vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc]
+vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc]
+vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc]
+vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc]
+vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc]
+vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc]
+vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
+vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddps %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
+vaddps %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubps %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
+vsubps %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulps %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
+vmulps %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivps %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
+vdivps %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddpd %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
+vaddpd %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubpd %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
+vsubpd %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulpd %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
+vmulpd %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivpd %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
+vdivpd %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc]
+vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc]
+vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc]
+vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc]
+vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc]
+vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc]
+vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc]
+vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc]
+vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmaxss %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2]
+ vmaxss %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxsd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2]
+ vmaxsd %xmm10, %xmm14, %xmm12
+
+// CHECK: vminss %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2]
+ vminss %xmm10, %xmm14, %xmm12
+
+// CHECK: vminsd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2]
+ vminsd %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc]
+ vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc]
+ vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc]
+ vminss -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
+ vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+ vmaxps %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+ vmaxpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+ vminps %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+ vminpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+ vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+ vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+ vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+ vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
+ vandps %xmm10, %xmm14, %xmm12
+
+// CHECK: vandpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
+ vandpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
+ vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
+ vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
+ vorps %xmm10, %xmm14, %xmm12
+
+// CHECK: vorpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
+ vorpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
+ vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
+ vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
+ vxorps %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
+ vxorpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
+ vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
+ vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnps %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
+ vandnps %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnpd %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
+ vandnpd %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
+ vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
+ vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+ vmovss -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+ vmovss %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+ vmovsd -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+ vmovsd %xmm14, %xmm10, %xmm15
+
+// CHECK: vunpckhps %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
+ vunpckhps %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
+ vunpckhpd %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklps %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
+ vunpcklps %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
+ vunpcklpd %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
+ vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
+ vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
+ vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
+ vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
+ vcmpps $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
+ vcmpps $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
+ vcmpps $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
+ vcmppd $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
+ vcmppd $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
+ vcmppd $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
+ vshufps $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
+ vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
+ vshufpd $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
+ vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
+ vcmpeqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
+ vcmpleps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
+ vcmpltps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
+ vcmpneqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
+ vcmpnleps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
+ vcmpnltps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
+ vcmpordps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
+ vcmpunordps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
+ vcmpeqpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
+ vcmplepd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
+ vcmpltpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
+ vcmpneqpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
+ vcmpnlepd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
+ vcmpnltpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
+ vcmpordpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
+ vcmpunordpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
+ vcmpeqss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
+ vcmpless %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
+ vcmpltss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
+ vcmpneqss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
+ vcmpnless %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
+ vcmpnltss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
+ vcmpordss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
+ vcmpunordss %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
+ vcmpeqsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
+ vcmplesd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
+ vcmpltsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
+ vcmpneqsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
+ vcmpnlesd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
+ vcmpnltsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
+ vcmpordsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
+ vcmpunordsd %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+ vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vucomiss %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
+ vucomiss %xmm11, %xmm12
+
+// CHECK: vucomiss (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
+ vucomiss (%rax), %xmm12
+
+// CHECK: vcomiss %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
+ vcomiss %xmm11, %xmm12
+
+// CHECK: vcomiss (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
+ vcomiss (%rax), %xmm12
+
+// CHECK: vucomisd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
+ vucomisd %xmm11, %xmm12
+
+// CHECK: vucomisd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
+ vucomisd (%rax), %xmm12
+
+// CHECK: vcomisd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
+ vcomisd %xmm11, %xmm12
+
+// CHECK: vcomisd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
+ vcomisd (%rax), %xmm12
+
+// CHECK: vcvttss2si (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+ vcvttss2si (%rcx), %eax
+
+// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+ vcvtsi2ss (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+ vcvtsi2ss (%rax), %xmm11, %xmm12
+
+// CHECK: vcvttsd2si (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+ vcvttsd2si (%rcx), %eax
+
+// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+ vcvtsi2sd (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+ vcvtsi2sd (%rax), %xmm11, %xmm12
+
+// CHECK: vmovaps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x28,0x20]
+ vmovaps (%rax), %xmm12
+
+// CHECK: vmovaps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
+ vmovaps %xmm11, %xmm12
+
+// CHECK: vmovaps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x29,0x18]
+ vmovaps %xmm11, (%rax)
+
+// CHECK: vmovapd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x28,0x20]
+ vmovapd (%rax), %xmm12
+
+// CHECK: vmovapd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
+ vmovapd %xmm11, %xmm12
+
+// CHECK: vmovapd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x29,0x18]
+ vmovapd %xmm11, (%rax)
+
+// CHECK: vmovups (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x10,0x20]
+ vmovups (%rax), %xmm12
+
+// CHECK: vmovups %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
+ vmovups %xmm11, %xmm12
+
+// CHECK: vmovups %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x11,0x18]
+ vmovups %xmm11, (%rax)
+
+// CHECK: vmovupd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x10,0x20]
+ vmovupd (%rax), %xmm12
+
+// CHECK: vmovupd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
+ vmovupd %xmm11, %xmm12
+
+// CHECK: vmovupd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x11,0x18]
+ vmovupd %xmm11, (%rax)
+
+// CHECK: vmovlps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x13,0x18]
+ vmovlps %xmm11, (%rax)
+
+// CHECK: vmovlps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x12,0x28]
+ vmovlps (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x13,0x18]
+ vmovlpd %xmm11, (%rax)
+
+// CHECK: vmovlpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x12,0x28]
+ vmovlpd (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x17,0x18]
+ vmovhps %xmm11, (%rax)
+
+// CHECK: vmovhps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x16,0x28]
+ vmovhps (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x17,0x18]
+ vmovhpd %xmm11, (%rax)
+
+// CHECK: vmovhpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x16,0x28]
+ vmovhpd (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlhps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
+ vmovlhps %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovhlps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
+ vmovhlps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcvtss2sil %xmm11, %eax
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
+ vcvtss2si %xmm11, %eax
+
+// CHECK: vcvtss2sil (%rax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+ vcvtss2si (%rax), %ebx
+
+// CHECK: vcvtdq2ps %xmm10, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
+ vcvtdq2ps %xmm10, %xmm12
+
+// CHECK: vcvtdq2ps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
+ vcvtdq2ps (%rax), %xmm12
+
+// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
+ vcvtsd2ss %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
+ vcvtsd2ss (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtps2dq %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
+ vcvtps2dq %xmm12, %xmm11
+
+// CHECK: vcvtps2dq (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
+ vcvtps2dq (%rax), %xmm11
+
+// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
+ vcvtss2sd %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
+ vcvtss2sd (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
+ vcvtdq2ps %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps (%ecx), %xmm13
+// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
+ vcvtdq2ps (%ecx), %xmm13
+
+// CHECK: vcvttps2dq %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
+ vcvttps2dq %xmm12, %xmm11
+
+// CHECK: vcvttps2dq (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
+ vcvttps2dq (%rax), %xmm11
+
+// CHECK: vcvtps2pd %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
+ vcvtps2pd %xmm12, %xmm11
+
+// CHECK: vcvtps2pd (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
+ vcvtps2pd (%rax), %xmm11
+
+// CHECK: vcvtpd2ps %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
+ vcvtpd2ps %xmm12, %xmm11
+
+// CHECK: vsqrtpd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
+ vsqrtpd %xmm11, %xmm12
+
+// CHECK: vsqrtpd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x51,0x20]
+ vsqrtpd (%rax), %xmm12
+
+// CHECK: vsqrtps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
+ vsqrtps %xmm11, %xmm12
+
+// CHECK: vsqrtps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x51,0x20]
+ vsqrtps (%rax), %xmm12
+
+// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
+ vsqrtsd %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtsd (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
+ vsqrtsd (%rax), %xmm12, %xmm10
+
+// CHECK: vsqrtss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
+ vsqrtss %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
+ vsqrtss (%rax), %xmm12, %xmm10
+
+// CHECK: vrsqrtps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
+ vrsqrtps %xmm11, %xmm12
+
+// CHECK: vrsqrtps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x52,0x20]
+ vrsqrtps (%rax), %xmm12
+
+// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
+ vrsqrtss %xmm11, %xmm12, %xmm10
+
+// CHECK: vrsqrtss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
+ vrsqrtss (%rax), %xmm12, %xmm10
+
+// CHECK: vrcpps %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
+ vrcpps %xmm11, %xmm12
+
+// CHECK: vrcpps (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x53,0x20]
+ vrcpps (%rax), %xmm12
+
+// CHECK: vrcpss %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
+ vrcpss %xmm11, %xmm12, %xmm10
+
+// CHECK: vrcpss (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
+ vrcpss (%rax), %xmm12, %xmm10
+
+// CHECK: vmovntdq %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
+ vmovntdq %xmm11, (%rax)
+
+// CHECK: vmovntpd %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
+ vmovntpd %xmm11, (%rax)
+
+// CHECK: vmovntps %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
+ vmovntps %xmm11, (%rax)
+
+// CHECK: vldmxcsr -4(%rip)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
+ vldmxcsr -4(%rip)
+
+// CHECK: vstmxcsr -4(%rsp)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+ vstmxcsr -4(%rsp)
+
+// CHECK: vpsubb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
+ vpsubb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
+ vpsubb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
+ vpsubw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
+ vpsubw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
+ vpsubd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
+ vpsubd (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
+ vpsubq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
+ vpsubq (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
+ vpsubsb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
+ vpsubsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
+ vpsubsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
+ vpsubsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
+ vpsubusb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
+ vpsubusb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
+ vpsubusw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
+ vpsubusw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
+ vpaddb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
+ vpaddb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
+ vpaddw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
+ vpaddw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
+ vpaddd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
+ vpaddd (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
+ vpaddq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
+ vpaddq (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
+ vpaddsb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xec,0x28]
+ vpaddsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
+ vpaddsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xed,0x28]
+ vpaddsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
+ vpaddusb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
+ vpaddusb (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
+ vpaddusw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
+ vpaddusw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
+ vpmulhuw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhuw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
+ vpmulhuw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
+ vpmulhw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
+ vpmulhw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmullw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
+ vpmullw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmullw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
+ vpmullw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuludq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
+ vpmuludq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmuludq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
+ vpmuludq (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
+ vpavgb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
+ vpavgb (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
+ vpavgw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
+ vpavgw (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
+ vpminsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xea,0x28]
+ vpminsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpminub %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
+ vpminub %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminub (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xda,0x28]
+ vpminub (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
+ vpmaxsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xee,0x28]
+ vpmaxsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxub %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
+ vpmaxub %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxub (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xde,0x28]
+ vpmaxub (%rax), %xmm12, %xmm13
+
+// CHECK: vpsadbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
+ vpsadbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsadbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
+ vpsadbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
+ vpsllw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
+ vpsllw (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
+ vpslld %xmm11, %xmm12, %xmm13
+
+// CHECK: vpslld (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
+ vpslld (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
+ vpsllq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
+ vpsllq (%rax), %xmm12, %xmm13
+
+// CHECK: vpsraw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
+ vpsraw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsraw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
+ vpsraw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrad %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
+ vpsrad %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrad (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
+ vpsrad (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
+ vpsrlw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
+ vpsrlw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrld %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
+ vpsrld %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrld (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
+ vpsrld (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
+ vpsrlq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
+ vpsrlq (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+ vpslld $10, %xmm12, %xmm13
+
+// CHECK: vpslldq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
+ vpslldq $10, %xmm12, %xmm13
+
+// CHECK: vpsllq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
+ vpsllq $10, %xmm12, %xmm13
+
+// CHECK: vpsllw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
+ vpsllw $10, %xmm12, %xmm13
+
+// CHECK: vpsrad $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
+ vpsrad $10, %xmm12, %xmm13
+
+// CHECK: vpsraw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
+ vpsraw $10, %xmm12, %xmm13
+
+// CHECK: vpsrld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
+ vpsrld $10, %xmm12, %xmm13
+
+// CHECK: vpsrldq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
+ vpsrldq $10, %xmm12, %xmm13
+
+// CHECK: vpsrlq $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
+ vpsrlq $10, %xmm12, %xmm13
+
+// CHECK: vpsrlw $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
+ vpsrlw $10, %xmm12, %xmm13
+
+// CHECK: vpslld $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+ vpslld $10, %xmm12, %xmm13
+
+// CHECK: vpand %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
+ vpand %xmm11, %xmm12, %xmm13
+
+// CHECK: vpand (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
+ vpand (%rax), %xmm12, %xmm13
+
+// CHECK: vpor %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
+ vpor %xmm11, %xmm12, %xmm13
+
+// CHECK: vpor (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
+ vpor (%rax), %xmm12, %xmm13
+
+// CHECK: vpxor %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
+ vpxor %xmm11, %xmm12, %xmm13
+
+// CHECK: vpxor (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xef,0x28]
+ vpxor (%rax), %xmm12, %xmm13
+
+// CHECK: vpandn %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
+ vpandn %xmm11, %xmm12, %xmm13
+
+// CHECK: vpandn (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
+ vpandn (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
+ vpcmpeqb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x74,0x28]
+ vpcmpeqb (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
+ vpcmpeqw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x75,0x28]
+ vpcmpeqw (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
+ vpcmpeqd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x76,0x28]
+ vpcmpeqd (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
+ vpcmpgtb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x64,0x28]
+ vpcmpgtb (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
+ vpcmpgtw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x65,0x28]
+ vpcmpgtw (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
+ vpcmpgtd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x66,0x28]
+ vpcmpgtd (%rax), %xmm12, %xmm13
+
+// CHECK: vpacksswb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
+ vpacksswb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpacksswb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x63,0x28]
+ vpacksswb (%rax), %xmm12, %xmm13
+
+// CHECK: vpackssdw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
+ vpackssdw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackssdw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
+ vpackssdw (%rax), %xmm12, %xmm13
+
+// CHECK: vpackuswb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
+ vpackuswb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackuswb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x67,0x28]
+ vpackuswb (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufd $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
+ vpshufd $4, %xmm12, %xmm13
+
+// CHECK: vpshufd $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
+ vpshufd $4, (%rax), %xmm13
+
+// CHECK: vpshufhw $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
+ vpshufhw $4, %xmm12, %xmm13
+
+// CHECK: vpshufhw $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
+ vpshufhw $4, (%rax), %xmm13
+
+// CHECK: vpshuflw $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
+ vpshuflw $4, %xmm12, %xmm13
+
+// CHECK: vpshuflw $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
+ vpshuflw $4, (%rax), %xmm13
+
+// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
+ vpunpcklbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x60,0x28]
+ vpunpcklbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
+ vpunpcklwd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x61,0x28]
+ vpunpcklwd (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
+ vpunpckldq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckldq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x62,0x28]
+ vpunpckldq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
+ vpunpcklqdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
+ vpunpcklqdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
+ vpunpckhbw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x68,0x28]
+ vpunpckhbw (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
+ vpunpckhwd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x69,0x28]
+ vpunpckhwd (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
+ vpunpckhdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
+ vpunpckhdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
+ vpunpckhqdq %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
+ vpunpckhqdq (%rax), %xmm12, %xmm13
+
+// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
+ vpinsrw $7, %eax, %xmm12, %xmm13
+
+// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
+ vpinsrw $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vpextrw $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+ vpextrw $7, %xmm12, %eax
+
+// CHECK: vpmovmskb %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
+ vpmovmskb %xmm12, %eax
+
+// CHECK: vmaskmovdqu %xmm14, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
+ vmaskmovdqu %xmm14, %xmm15
+
+// CHECK: vmovd %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+ vmovd %eax, %xmm14
+
+// CHECK: vmovd (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
+ vmovd (%rax), %xmm14
+
+// CHECK: vmovd %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
+ vmovd %xmm14, (%rax)
+
+// CHECK: vmovd %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+ vmovd %rax, %xmm14
+
+// CHECK: vmovq %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
+ vmovq %xmm14, (%rax)
+
+// CHECK: vmovq %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
+ vmovq %xmm14, %xmm12
+
+// CHECK: vmovq (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
+ vmovq (%rax), %xmm14
+
+// CHECK: vmovq %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+ vmovq %rax, %xmm14
+
+// CHECK: vmovq %xmm14, %rax
+// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
+ vmovq %xmm14, %rax
+
+// CHECK: vcvtpd2dq %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
+ vcvtpd2dq %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
+ vcvtdq2pd %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
+ vcvtdq2pd (%rax), %xmm12
+
+// CHECK: vmovshdup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
+ vmovshdup %xmm11, %xmm12
+
+// CHECK: vmovshdup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
+ vmovshdup (%rax), %xmm12
+
+// CHECK: vmovsldup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
+ vmovsldup %xmm11, %xmm12
+
+// CHECK: vmovsldup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
+ vmovsldup (%rax), %xmm12
+
+// CHECK: vmovddup %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
+ vmovddup %xmm11, %xmm12
+
+// CHECK: vmovddup (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
+ vmovddup (%rax), %xmm12
+
+// CHECK: vaddsubps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
+ vaddsubps %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubps (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
+ vaddsubps (%rax), %xmm11, %xmm12
+
+// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
+ vaddsubpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubpd (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
+ vaddsubpd (%rax), %xmm11, %xmm12
+
+// CHECK: vhaddps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
+ vhaddps %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
+ vhaddps (%rax), %xmm12, %xmm13
+
+// CHECK: vhaddpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
+ vhaddpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
+ vhaddpd (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubps %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
+ vhsubps %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubps (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
+ vhsubps (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubpd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
+ vhsubpd %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubpd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
+ vhsubpd (%rax), %xmm12, %xmm13
+
+// CHECK: vpabsb %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
+ vpabsb %xmm11, %xmm12
+
+// CHECK: vpabsb (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
+ vpabsb (%rax), %xmm12
+
+// CHECK: vpabsw %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
+ vpabsw %xmm11, %xmm12
+
+// CHECK: vpabsw (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
+ vpabsw (%rax), %xmm12
+
+// CHECK: vpabsd %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
+ vpabsd %xmm11, %xmm12
+
+// CHECK: vpabsd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
+ vpabsd (%rax), %xmm12
+
+// CHECK: vphaddw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
+ vphaddw %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
+ vphaddw (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
+ vphaddd %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
+ vphaddd (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
+ vphaddsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
+ vphaddsw (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
+ vphsubw %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
+ vphsubw (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
+ vphsubd %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
+ vphsubd (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
+ vphsubsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
+ vphsubsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
+ vpmaddubsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
+ vpmaddubsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
+ vpshufb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpshufb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
+ vpshufb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignb %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
+ vpsignb %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
+ vpsignb (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
+ vpsignw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
+ vpsignw (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignd %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
+ vpsignd %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
+ vpsignd (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
+ vpmulhrsw %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
+ vpmulhrsw (%rax), %xmm12, %xmm13
+
+// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
+ vpalignr $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
+ vpalignr $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07]
+ vroundsd $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07]
+ vroundsd $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07]
+ vroundss $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundss $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07]
+ vroundss $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundpd $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07]
+ vroundpd $7, %xmm12, %xmm13
+
+// CHECK: vroundpd $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07]
+ vroundpd $7, (%rax), %xmm13
+
+// CHECK: vroundps $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07]
+ vroundps $7, %xmm12, %xmm13
+
+// CHECK: vroundps $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07]
+ vroundps $7, (%rax), %xmm13
+
+// CHECK: vphminposuw %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec]
+ vphminposuw %xmm12, %xmm13
+
+// CHECK: vphminposuw (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
+ vphminposuw (%rax), %xmm12
+
+// CHECK: vpackusdw %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
+ vpackusdw %xmm12, %xmm13, %xmm11
+
+// CHECK: vpackusdw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
+ vpackusdw (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
+ vpcmpeqq %xmm12, %xmm13, %xmm11
+
+// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
+ vpcmpeqq (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsb %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
+ vpminsb %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
+ vpminsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsd %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
+ vpminsd %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
+ vpminsd (%rax), %xmm12, %xmm13
+
+// CHECK: vpminud %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
+ vpminud %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminud (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
+ vpminud (%rax), %xmm12, %xmm13
+
+// CHECK: vpminuw %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
+ vpminuw %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminuw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
+ vpminuw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
+ vpmaxsb %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsb (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
+ vpmaxsb (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
+ vpmaxsd %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsd (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
+ vpmaxsd (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxud %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
+ vpmaxud %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxud (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
+ vpmaxud (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
+ vpmaxuw %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxuw (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
+ vpmaxuw (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuldq %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
+ vpmuldq %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmuldq (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
+ vpmuldq (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulld %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc]
+ vpmulld %xmm12, %xmm5, %xmm11
+
+// CHECK: vpmulld (%rax), %xmm5, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28]
+ vpmulld (%rax), %xmm5, %xmm13
+
+// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03]
+ vblendps $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendps $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03]
+ vblendps $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03]
+ vblendpd $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03]
+ vblendpd $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03]
+ vpblendw $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03]
+ vpblendw $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03]
+ vmpsadbw $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03]
+ vmpsadbw $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03]
+ vdpps $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdpps $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03]
+ vdpps $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03]
+ vdppd $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdppd $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
+ vdppd $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
+ vblendvpd %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
+ vblendvpd %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
+ vblendvps %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
+ vblendvps %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
+ vpblendvb %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
+ vpblendvb %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpmovsxbw %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4]
+ vpmovsxbw %xmm12, %xmm10
+
+// CHECK: vpmovsxbw (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20]
+ vpmovsxbw (%rax), %xmm12
+
+// CHECK: vpmovsxwd %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4]
+ vpmovsxwd %xmm12, %xmm10
+
+// CHECK: vpmovsxwd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20]
+ vpmovsxwd (%rax), %xmm12
+
+// CHECK: vpmovsxdq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4]
+ vpmovsxdq %xmm12, %xmm10
+
+// CHECK: vpmovsxdq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20]
+ vpmovsxdq (%rax), %xmm12
+
+// CHECK: vpmovzxbw %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4]
+ vpmovzxbw %xmm12, %xmm10
+
+// CHECK: vpmovzxbw (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20]
+ vpmovzxbw (%rax), %xmm12
+
+// CHECK: vpmovzxwd %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4]
+ vpmovzxwd %xmm12, %xmm10
+
+// CHECK: vpmovzxwd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20]
+ vpmovzxwd (%rax), %xmm12
+
+// CHECK: vpmovzxdq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4]
+ vpmovzxdq %xmm12, %xmm10
+
+// CHECK: vpmovzxdq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20]
+ vpmovzxdq (%rax), %xmm12
+
+// CHECK: vpmovsxbq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4]
+ vpmovsxbq %xmm12, %xmm10
+
+// CHECK: vpmovsxbq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20]
+ vpmovsxbq (%rax), %xmm12
+
+// CHECK: vpmovzxbq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4]
+ vpmovzxbq %xmm12, %xmm10
+
+// CHECK: vpmovzxbq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20]
+ vpmovzxbq (%rax), %xmm12
+
+// CHECK: vpmovsxbd %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4]
+ vpmovsxbd %xmm12, %xmm10
+
+// CHECK: vpmovsxbd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20]
+ vpmovsxbd (%rax), %xmm12
+
+// CHECK: vpmovsxwq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4]
+ vpmovsxwq %xmm12, %xmm10
+
+// CHECK: vpmovsxwq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20]
+ vpmovsxwq (%rax), %xmm12
+
+// CHECK: vpmovzxbd %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4]
+ vpmovzxbd %xmm12, %xmm10
+
+// CHECK: vpmovzxbd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20]
+ vpmovzxbd (%rax), %xmm12
+
+// CHECK: vpmovzxwq %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4]
+ vpmovzxwq %xmm12, %xmm10
+
+// CHECK: vpmovzxwq (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20]
+ vpmovzxwq (%rax), %xmm12
+
+// CHECK: vpextrw $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+ vpextrw $7, %xmm12, %eax
+
+// CHECK: vpextrw $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07]
+ vpextrw $7, %xmm12, (%rax)
+
+// CHECK: vpextrd $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07]
+ vpextrd $7, %xmm12, %eax
+
+// CHECK: vpextrd $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07]
+ vpextrd $7, %xmm12, (%rax)
+
+// CHECK: vpextrb $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07]
+ vpextrb $7, %xmm12, %eax
+
+// CHECK: vpextrb $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07]
+ vpextrb $7, %xmm12, (%rax)
+
+// CHECK: vpextrq $7, %xmm12, %rcx
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07]
+ vpextrq $7, %xmm12, %rcx
+
+// CHECK: vpextrq $7, %xmm12, (%rcx)
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
+ vpextrq $7, %xmm12, (%rcx)
+
+// CHECK: vextractps $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
+ vextractps $7, %xmm12, (%rax)
+
+// CHECK: vextractps $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
+ vextractps $7, %xmm12, %eax
+
+// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
+ vpinsrw $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
+ vpinsrw $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
+ vpinsrb $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
+ vpinsrb $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
+ vpinsrd $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
+ vpinsrd $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
+ vpinsrq $7, %rax, %xmm12, %xmm10
+
+// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
+ vpinsrq $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
+ vinsertps $7, %xmm12, %xmm10, %xmm11
+
+// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
+ vinsertps $7, (%rax), %xmm10, %xmm11
+
+// CHECK: vptest %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
+ vptest %xmm12, %xmm10
+
+// CHECK: vptest (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
+ vptest (%rax), %xmm12
+
+// CHECK: vmovntdqa (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
+ vmovntdqa (%rax), %xmm12
+
+// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc]
+ vpcmpgtq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28]
+ vpcmpgtq (%rax), %xmm10, %xmm13
+
+// CHECK: vpcmpistrm $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07]
+ vpcmpistrm $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistrm $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07]
+ vpcmpistrm $7, (%rax), %xmm10
+
+// CHECK: vpcmpestrm $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07]
+ vpcmpestrm $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestrm $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07]
+ vpcmpestrm $7, (%rax), %xmm10
+
+// CHECK: vpcmpistri $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07]
+ vpcmpistri $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistri $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07]
+ vpcmpistri $7, (%rax), %xmm10
+
+// CHECK: vpcmpestri $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07]
+ vpcmpestri $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestri $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07]
+ vpcmpestri $7, (%rax), %xmm10
+
+// CHECK: vaesimc %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4]
+ vaesimc %xmm12, %xmm10
+
+// CHECK: vaesimc (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20]
+ vaesimc (%rax), %xmm12
+
+// CHECK: vaesenc %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc]
+ vaesenc %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenc (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28]
+ vaesenc (%rax), %xmm10, %xmm13
+
+// CHECK: vaesenclast %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc]
+ vaesenclast %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenclast (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28]
+ vaesenclast (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdec %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc]
+ vaesdec %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdec (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28]
+ vaesdec (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc]
+ vaesdeclast %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdeclast (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28]
+ vaesdeclast (%rax), %xmm10, %xmm13
+
+// CHECK: vaeskeygenassist $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07]
+ vaeskeygenassist $7, %xmm12, %xmm10
+
+// CHECK: vaeskeygenassist $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07]
+ vaeskeygenassist $7, (%rax), %xmm10
+
+// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
+ vcmpeq_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
+ vcmpngeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
+ vcmpngtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
+ vcmpfalseps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
+ vcmpneq_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
+ vcmpgeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
+ vcmpgtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
+ vcmptrueps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
+ vcmpeq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
+ vcmplt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
+ vcmple_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
+ vcmpunord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
+ vcmpneq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
+ vcmpnlt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
+ vcmpnle_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
+ vcmpord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
+ vcmpeq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
+ vcmpnge_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
+ vcmpngt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
+ vcmpfalse_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
+ vcmpneq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
+ vcmpge_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
+ vcmpgt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
+ vcmptrue_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovaps (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
+ vmovaps (%rax), %ymm12
+
+// CHECK: vmovaps %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
+ vmovaps %ymm11, %ymm12
+
+// CHECK: vmovaps %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
+ vmovaps %ymm11, (%rax)
+
+// CHECK: vmovapd (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
+ vmovapd (%rax), %ymm12
+
+// CHECK: vmovapd %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
+ vmovapd %ymm11, %ymm12
+
+// CHECK: vmovapd %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
+ vmovapd %ymm11, (%rax)
+
+// CHECK: vmovups (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
+ vmovups (%rax), %ymm12
+
+// CHECK: vmovups %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
+ vmovups %ymm11, %ymm12
+
+// CHECK: vmovups %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
+ vmovups %ymm11, (%rax)
+
+// CHECK: vmovupd (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
+ vmovupd (%rax), %ymm12
+
+// CHECK: vmovupd %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
+ vmovupd %ymm11, %ymm12
+
+// CHECK: vmovupd %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
+ vmovupd %ymm11, (%rax)
+
+// CHECK: vunpckhps %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
+ vunpckhps %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
+ vunpckhpd %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklps %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
+ vunpcklps %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
+ vunpcklpd %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
+ vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
+ vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
+ vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
+ vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vmovntdq %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0xe7,0x18]
+ vmovntdq %ymm11, (%rax)
+
+// CHECK: vmovntpd %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x2b,0x18]
+ vmovntpd %ymm11, (%rax)
+
+// CHECK: vmovntps %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x2b,0x18]
+ vmovntps %ymm11, (%rax)
+
+// CHECK: vmovmskps %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4]
+ vmovmskps %xmm12, %eax
+
+// CHECK: vmovmskpd %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4]
+ vmovmskpd %xmm12, %eax
+
+// CHECK: vmaxps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4]
+ vmaxps %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4]
+ vmaxpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vminps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4]
+ vminps %ymm12, %ymm4, %ymm6
+
+// CHECK: vminpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4]
+ vminpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4]
+ vsubps %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4]
+ vsubpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4]
+ vdivps %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
+ vdivpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
+ vaddps %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
+ vaddpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulps %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
+ vmulps %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulpd %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
+ vmulpd %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+ vmaxps (%rax), %ymm4, %ymm6
+
+// CHECK: vmaxpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+ vmaxpd (%rax), %ymm4, %ymm6
+
+// CHECK: vminps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+ vminps (%rax), %ymm4, %ymm6
+
+// CHECK: vminpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+ vminpd (%rax), %ymm4, %ymm6
+
+// CHECK: vsubps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+ vsubps (%rax), %ymm4, %ymm6
+
+// CHECK: vsubpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+ vsubpd (%rax), %ymm4, %ymm6
+
+// CHECK: vdivps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+ vdivps (%rax), %ymm4, %ymm6
+
+// CHECK: vdivpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+ vdivpd (%rax), %ymm4, %ymm6
+
+// CHECK: vaddps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+ vaddps (%rax), %ymm4, %ymm6
+
+// CHECK: vaddpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+ vaddpd (%rax), %ymm4, %ymm6
+
+// CHECK: vmulps (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+ vmulps (%rax), %ymm4, %ymm6
+
+// CHECK: vmulpd (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+ vmulpd (%rax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3]
+ vsqrtpd %ymm11, %ymm12
+
+// CHECK: vsqrtpd (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x51,0x20]
+ vsqrtpd (%rax), %ymm12
+
+// CHECK: vsqrtps %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3]
+ vsqrtps %ymm11, %ymm12
+
+// CHECK: vsqrtps (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x51,0x20]
+ vsqrtps (%rax), %ymm12
+
+// CHECK: vrsqrtps %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3]
+ vrsqrtps %ymm11, %ymm12
+
+// CHECK: vrsqrtps (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x52,0x20]
+ vrsqrtps (%rax), %ymm12
+
+// CHECK: vrcpps %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3]
+ vrcpps %ymm11, %ymm12
+
+// CHECK: vrcpps (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x53,0x20]
+ vrcpps (%rax), %ymm12
+
+// CHECK: vandps %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc]
+ vandps %ymm12, %ymm14, %ymm11
+
+// CHECK: vandpd %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc]
+ vandpd %ymm12, %ymm14, %ymm11
+
+// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc]
+ vandps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc]
+ vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorps %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc]
+ vorps %ymm12, %ymm14, %ymm11
+
+// CHECK: vorpd %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc]
+ vorpd %ymm12, %ymm14, %ymm11
+
+// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc]
+ vorps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc]
+ vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorps %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc]
+ vxorps %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorpd %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc]
+ vxorpd %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc]
+ vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc]
+ vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnps %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc]
+ vandnps %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnpd %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc]
+ vandnpd %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc]
+ vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
+ vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vcvtps2pd %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
+ vcvtps2pd %xmm13, %ymm12
+
+// CHECK: vcvtps2pd (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
+ vcvtps2pd (%rax), %ymm12
+
+// CHECK: vcvtdq2pd %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
+ vcvtdq2pd %xmm13, %ymm12
+
+// CHECK: vcvtdq2pd (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
+ vcvtdq2pd (%rax), %ymm12
+
+// CHECK: vcvtdq2ps %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
+ vcvtdq2ps %ymm12, %ymm10
+
+// CHECK: vcvtdq2ps (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
+ vcvtdq2ps (%rax), %ymm12
+
+// CHECK: vcvtps2dq %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
+ vcvtps2dq %ymm12, %ymm10
+
+// CHECK: vcvtps2dq (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
+ vcvtps2dq (%rax), %ymm10
+
+// CHECK: vcvttps2dq %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
+ vcvttps2dq %ymm12, %ymm10
+
+// CHECK: vcvttps2dq (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
+ vcvttps2dq (%rax), %ymm10
+
+// CHECK: vcvttpd2dq %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+ vcvttpd2dq %xmm11, %xmm10
+
+// CHECK: vcvttpd2dq %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
+ vcvttpd2dq %ymm12, %xmm10
+
+// CHECK: vcvttpd2dqx %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+ vcvttpd2dqx %xmm11, %xmm10
+
+// CHECK: vcvttpd2dqx (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
+ vcvttpd2dqx (%rax), %xmm11
+
+// CHECK: vcvttpd2dqy %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
+ vcvttpd2dqy %ymm12, %xmm11
+
+// CHECK: vcvttpd2dqy (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
+ vcvttpd2dqy (%rax), %xmm11
+
+// CHECK: vcvtpd2ps %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
+ vcvtpd2ps %ymm12, %xmm10
+
+// CHECK: vcvtpd2psx %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
+ vcvtpd2psx %xmm11, %xmm10
+
+// CHECK: vcvtpd2psx (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
+ vcvtpd2psx (%rax), %xmm11
+
+// CHECK: vcvtpd2psy %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
+ vcvtpd2psy %ymm12, %xmm11
+
+// CHECK: vcvtpd2psy (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
+ vcvtpd2psy (%rax), %xmm11
+
+// CHECK: vcvtpd2dq %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
+ vcvtpd2dq %ymm12, %xmm10
+
+// CHECK: vcvtpd2dqy %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
+ vcvtpd2dqy %ymm12, %xmm11
+
+// CHECK: vcvtpd2dqy (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
+ vcvtpd2dqy (%rax), %xmm11
+
+// CHECK: vcvtpd2dqx %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
+ vcvtpd2dqx %xmm11, %xmm10
+
+// CHECK: vcvtpd2dqx (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
+ vcvtpd2dqx (%rax), %xmm11
+
+// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
+ vcmpeqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
+ vcmpleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
+ vcmpltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
+ vcmpneqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
+ vcmpnleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
+ vcmpnltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
+ vcmpordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
+ vcmpunordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
+ vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
+ vcmpeqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
+ vcmplepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
+ vcmpltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
+ vcmpneqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
+ vcmpnlepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
+ vcmpnltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
+ vcmpordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
+ vcmpunordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
+ vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
+ vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
+ vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
+ vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
+ vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
+ vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
+ vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
+ vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
+ vcmpeq_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
+ vcmpngeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
+ vcmpngtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
+ vcmpfalseps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
+ vcmpneq_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
+ vcmpgeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
+ vcmpgtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
+ vcmptrueps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
+ vcmpeq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
+ vcmplt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
+ vcmple_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
+ vcmpunord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
+ vcmpneq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
+ vcmpnlt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
+ vcmpnle_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
+ vcmpord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
+ vcmpeq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
+ vcmpnge_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
+ vcmpngt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
+ vcmpfalse_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
+ vcmpneq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
+ vcmpge_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
+ vcmpgt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
+ vcmptrue_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
+ vaddsubps %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
+ vaddsubps (%rax), %ymm11, %ymm12
+
+// CHECK: vaddsubpd %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
+ vaddsubpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubpd (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
+ vaddsubpd (%rax), %ymm11, %ymm12
+
+// CHECK: vhaddps %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
+ vhaddps %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddps (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
+ vhaddps (%rax), %ymm12, %ymm13
+
+// CHECK: vhaddpd %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
+ vhaddpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddpd (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
+ vhaddpd (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubps %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
+ vhsubps %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubps (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
+ vhsubps (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubpd %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
+ vhsubpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubpd (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
+ vhsubpd (%rax), %ymm12, %ymm13
+
+// CHECK: vblendps $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
+ vblendps $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendps $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
+ vblendps $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vblendpd $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
+ vblendpd $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendpd $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
+ vblendpd $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vdpps $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
+ vdpps $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vdpps $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
+ vdpps $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vbroadcastf128 (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20]
+ vbroadcastf128 (%rax), %ymm12
+
+// CHECK: vbroadcastsd (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20]
+ vbroadcastsd (%rax), %ymm12
+
+// CHECK: vbroadcastss (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20]
+ vbroadcastss (%rax), %xmm12
+
+// CHECK: vbroadcastss (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20]
+ vbroadcastss (%rax), %ymm12
+
+// CHECK: vinsertf128 $7, %xmm12, %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07]
+ vinsertf128 $7, %xmm12, %ymm12, %ymm10
+
+// CHECK: vinsertf128 $7, (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07]
+ vinsertf128 $7, (%rax), %ymm12, %ymm10
+
+// CHECK: vextractf128 $7, %ymm12, %xmm12
+// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07]
+ vextractf128 $7, %ymm12, %xmm12
+
+// CHECK: vextractf128 $7, %ymm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07]
+ vextractf128 $7, %ymm12, (%rax)
+
+// CHECK: vmaskmovpd %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20]
+ vmaskmovpd %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovpd %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20]
+ vmaskmovpd %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovpd (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10]
+ vmaskmovpd (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovpd (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10]
+ vmaskmovpd (%rax), %ymm12, %ymm10
+
+// CHECK: vmaskmovps %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20]
+ vmaskmovps %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovps %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20]
+ vmaskmovps %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovps (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10]
+ vmaskmovps (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovps (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10]
+ vmaskmovps (%rax), %ymm12, %ymm10
+
+// CHECK: vpermilps $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07]
+ vpermilps $7, %xmm11, %xmm10
+
+// CHECK: vpermilps $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07]
+ vpermilps $7, %ymm10, %ymm11
+
+// CHECK: vpermilps $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07]
+ vpermilps $7, (%rax), %xmm10
+
+// CHECK: vpermilps $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07]
+ vpermilps $7, (%rax), %ymm10
+
+// CHECK: vpermilps %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb]
+ vpermilps %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilps %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb]
+ vpermilps %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilps (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28]
+ vpermilps (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18]
+ vpermilps (%rax), %ymm10, %ymm11
+
+// CHECK: vpermilpd $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07]
+ vpermilpd $7, %xmm11, %xmm10
+
+// CHECK: vpermilpd $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07]
+ vpermilpd $7, %ymm10, %ymm11
+
+// CHECK: vpermilpd $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07]
+ vpermilpd $7, (%rax), %xmm10
+
+// CHECK: vpermilpd $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07]
+ vpermilpd $7, (%rax), %ymm10
+
+// CHECK: vpermilpd %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb]
+ vpermilpd %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilpd %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb]
+ vpermilpd %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilpd (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28]
+ vpermilpd (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilpd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18]
+ vpermilpd (%rax), %ymm10, %ymm11
+
+// CHECK: vperm2f128 $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07]
+ vperm2f128 $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vperm2f128 $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
+ vperm2f128 $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vcvtsd2si %xmm8, %r8d
+// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
+ vcvtsd2si %xmm8, %r8d
+
+// CHECK: vcvtsd2si (%rcx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+ vcvtsd2si (%rcx), %ecx
+
+// CHECK: vcvtss2si %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
+ vcvtss2si %xmm4, %rcx
+
+// CHECK: vcvtss2si (%rcx), %r8
+// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
+ vcvtss2si (%rcx), %r8
+
+// CHECK: vcvtsi2sdl %r8d, %xmm8, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
+ vcvtsi2sdl %r8d, %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdl (%rbp), %xmm8, %xmm15
+// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
+ vcvtsi2sdl (%rbp), %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdq %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
+ vcvtsi2sdq %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2sdq (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31]
+ vcvtsi2sdq (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1]
+ vcvtsi2ssq %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
+ vcvtsi2ssq (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvttsd2si %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
+ vcvttsd2si %xmm4, %rcx
+
+// CHECK: vcvttsd2si (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
+ vcvttsd2si (%rcx), %rcx
+
+// CHECK: vcvttss2si %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
+ vcvttss2si %xmm4, %rcx
+
+// CHECK: vcvttss2si (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
+ vcvttss2si (%rcx), %rcx
+
+// CHECK: vlddqu (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
+ vlddqu (%rax), %ymm12
+
+// CHECK: vmovddup %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4]
+ vmovddup %ymm12, %ymm10
+
+// CHECK: vmovddup (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0x12,0x20]
+ vmovddup (%rax), %ymm12
+
+// CHECK: vmovdqa %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4]
+ vmovdqa %ymm12, %ymm10
+
+// CHECK: vmovdqa %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x7f,0x20]
+ vmovdqa %ymm12, (%rax)
+
+// CHECK: vmovdqa (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x6f,0x20]
+ vmovdqa (%rax), %ymm12
+
+// CHECK: vmovdqu %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4]
+ vmovdqu %ymm12, %ymm10
+
+// CHECK: vmovdqu %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7e,0x7f,0x20]
+ vmovdqu %ymm12, (%rax)
+
+// CHECK: vmovdqu (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x6f,0x20]
+ vmovdqu (%rax), %ymm12
+
+// CHECK: vmovshdup %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4]
+ vmovshdup %ymm12, %ymm10
+
+// CHECK: vmovshdup (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x16,0x20]
+ vmovshdup (%rax), %ymm12
+
+// CHECK: vmovsldup %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4]
+ vmovsldup %ymm12, %ymm10
+
+// CHECK: vmovsldup (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x12,0x20]
+ vmovsldup (%rax), %ymm12
+
+// CHECK: vptest %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4]
+ vptest %ymm12, %ymm10
+
+// CHECK: vptest (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20]
+ vptest (%rax), %ymm12
+
+// CHECK: vroundpd $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07]
+ vroundpd $7, %ymm10, %ymm11
+
+// CHECK: vroundpd $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07]
+ vroundpd $7, (%rax), %ymm10
+
+// CHECK: vroundps $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07]
+ vroundps $7, %ymm10, %ymm11
+
+// CHECK: vroundps $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07]
+ vroundps $7, (%rax), %ymm10
+
+// CHECK: vshufpd $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07]
+ vshufpd $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufpd $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07]
+ vshufpd $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vshufps $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07]
+ vshufps $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufps $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07]
+ vshufps $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vtestpd %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4]
+ vtestpd %xmm12, %xmm10
+
+// CHECK: vtestpd %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4]
+ vtestpd %ymm12, %ymm10
+
+// CHECK: vtestpd (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20]
+ vtestpd (%rax), %xmm12
+
+// CHECK: vtestpd (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20]
+ vtestpd (%rax), %ymm12
+
+// CHECK: vtestps %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4]
+ vtestps %xmm12, %xmm10
+
+// CHECK: vtestps %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4]
+ vtestps %ymm12, %ymm10
+
+// CHECK: vtestps (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20]
+ vtestps (%rax), %xmm12
+
+// CHECK: vtestps (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20]
+ vtestps (%rax), %ymm12
+
+// CHECK: vextractps $10, %xmm8, %r8
+// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
+ vextractps $10, %xmm8, %r8
+
+// CHECK: vextractps $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
+ vextractps $7, %xmm4, %rcx
+
+// CHECK: vmovd %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
+ vmovd %xmm4, %rcx
+
+// CHECK: vmovmskpd %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
+ vmovmskpd %xmm4, %rcx
+
+// CHECK: vmovmskpd %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
+ vmovmskpd %ymm4, %rcx
+
+// CHECK: vmovmskps %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
+ vmovmskps %xmm4, %rcx
+
+// CHECK: vmovmskps %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
+ vmovmskps %ymm4, %rcx
+
+// CHECK: vpextrb $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
+ vpextrb $7, %xmm4, %rcx
+
+// CHECK: vpinsrw $7, %r8, %xmm15, %xmm8
+// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
+ vpinsrw $7, %r8, %xmm15, %xmm8
+
+// CHECK: vpinsrw $7, %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
+ vpinsrw $7, %rcx, %xmm4, %xmm6
+
+// CHECK: vpmovmskb %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
+ vpmovmskb %xmm4, %rcx
+
+// CHECK: vblendvpd %ymm11, 57005(%rax,%riz), %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0]
+ vblendvpd %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13
+
diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s
index 0fce5925cba9..f45b0a23d5e8 100644
--- a/test/MC/AsmParser/X86/x86_64-encoding.s
+++ b/test/MC/AsmParser/X86/x86_64-encoding.s
@@ -4,7 +4,7 @@
// CHECK: callw 42
// CHECK: encoding: [0x66,0xe8,A,A]
callw 42
-
+
// rdar://8127102
// CHECK: movq %gs:(%rdi), %rax
// CHECK: encoding: [0x65,0x48,0x8b,0x07]
@@ -114,2889 +114,29 @@ movd %mm1, %rdx
// CHECK: encoding: [0x0f,0x7e,0xca]
movd %mm1, %edx
-// CHECK: vaddss %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x32,0x58,0xd0]
-vaddss %xmm8, %xmm9, %xmm10
-
-// CHECK: vmulss %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x32,0x59,0xd0]
-vmulss %xmm8, %xmm9, %xmm10
-
-// CHECK: vsubss %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x32,0x5c,0xd0]
-vsubss %xmm8, %xmm9, %xmm10
-
-// CHECK: vdivss %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x32,0x5e,0xd0]
-vdivss %xmm8, %xmm9, %xmm10
-
-// CHECK: vaddsd %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x33,0x58,0xd0]
-vaddsd %xmm8, %xmm9, %xmm10
-
-// CHECK: vmulsd %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x33,0x59,0xd0]
-vmulsd %xmm8, %xmm9, %xmm10
-
-// CHECK: vsubsd %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x33,0x5c,0xd0]
-vsubsd %xmm8, %xmm9, %xmm10
-
-// CHECK: vdivsd %xmm8, %xmm9, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x33,0x5e,0xd0]
-vdivsd %xmm8, %xmm9, %xmm10
-
-// CHECK: vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc]
-vaddss -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc]
-vsubss -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc]
-vmulss -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc]
-vdivss -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc]
-vaddsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc]
-vsubsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc]
-vmulsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
-vdivsd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vaddps %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
-vaddps %xmm10, %xmm11, %xmm15
-
-// CHECK: vsubps %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
-vsubps %xmm10, %xmm11, %xmm15
-
-// CHECK: vmulps %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
-vmulps %xmm10, %xmm11, %xmm15
-
-// CHECK: vdivps %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
-vdivps %xmm10, %xmm11, %xmm15
-
-// CHECK: vaddpd %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
-vaddpd %xmm10, %xmm11, %xmm15
-
-// CHECK: vsubpd %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
-vsubpd %xmm10, %xmm11, %xmm15
-
-// CHECK: vmulpd %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
-vmulpd %xmm10, %xmm11, %xmm15
-
-// CHECK: vdivpd %xmm10, %xmm11, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
-vdivpd %xmm10, %xmm11, %xmm15
-
-// CHECK: vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc]
-vaddps -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc]
-vsubps -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc]
-vmulps -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc]
-vdivps -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc]
-vaddpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc]
-vsubpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc]
-vmulpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc]
-vdivpd -4(%rcx,%rbx,8), %xmm10, %xmm11
-
-// CHECK: vmaxss %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2]
- vmaxss %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxsd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2]
- vmaxsd %xmm10, %xmm14, %xmm12
-
-// CHECK: vminss %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2]
- vminss %xmm10, %xmm14, %xmm12
-
-// CHECK: vminsd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2]
- vminsd %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc]
- vmaxss -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc]
- vmaxsd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminss -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc]
- vminss -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
- vminsd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
- vmaxps %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
- vmaxpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vminps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
- vminps %xmm10, %xmm14, %xmm12
-
-// CHECK: vminpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
- vminpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
- vmaxps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
- vmaxpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
- vminps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
- vminpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
- vandps %xmm10, %xmm14, %xmm12
-
-// CHECK: vandpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
- vandpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
- vandps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
- vandpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vorps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
- vorps %xmm10, %xmm14, %xmm12
-
-// CHECK: vorpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
- vorpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
- vorps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
- vorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vxorps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
- vxorps %xmm10, %xmm14, %xmm12
-
-// CHECK: vxorpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
- vxorpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
- vxorps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
- vxorpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandnps %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
- vandnps %xmm10, %xmm14, %xmm12
-
-// CHECK: vandnpd %xmm10, %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
- vandnpd %xmm10, %xmm14, %xmm12
-
-// CHECK: vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
- vandnps -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
- vandnpd -4(%rbx,%rcx,8), %xmm12, %xmm10
-
-// CHECK: vmovss -4(%rbx,%rcx,8), %xmm10
-// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
- vmovss -4(%rbx,%rcx,8), %xmm10
-
-// CHECK: vmovss %xmm14, %xmm10, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
- vmovss %xmm14, %xmm10, %xmm15
-
-// CHECK: vmovsd -4(%rbx,%rcx,8), %xmm10
-// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
- vmovsd -4(%rbx,%rcx,8), %xmm10
-
-// CHECK: vmovsd %xmm14, %xmm10, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
- vmovsd %xmm14, %xmm10, %xmm15
-
// rdar://7840289
// CHECK: pshufb CPI1_0(%rip), %xmm1
// CHECK: encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
// CHECK: fixup A - offset: 5, value: CPI1_0-4
pshufb CPI1_0(%rip), %xmm1
-// CHECK: vunpckhps %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
- vunpckhps %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpckhpd %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
- vunpckhpd %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpcklps %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
- vunpcklps %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpcklpd %xmm15, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
- vunpcklpd %xmm15, %xmm12, %xmm13
-
-// CHECK: vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
- vunpckhps -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
- vunpckhpd -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
- vunpcklps -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
- vunpcklpd -4(%rbx,%rcx,8), %xmm12, %xmm15
-
-// CHECK: vcmpps $0, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
- vcmpps $0, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmpps $0, (%rax), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
- vcmpps $0, (%rax), %xmm12, %xmm15
-
-// CHECK: vcmpps $7, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
- vcmpps $7, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmppd $0, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
- vcmppd $0, %xmm10, %xmm12, %xmm15
-
-// CHECK: vcmppd $0, (%rax), %xmm12, %xmm15
-// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
- vcmppd $0, (%rax), %xmm12, %xmm15
-
-// CHECK: vcmppd $7, %xmm10, %xmm12, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
- vcmppd $7, %xmm10, %xmm12, %xmm15
-
-// CHECK: vshufps $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
- vshufps $8, %xmm11, %xmm12, %xmm13
-
-// CHECK: vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
- vshufps $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vshufpd $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
- vshufpd $8, %xmm11, %xmm12, %xmm13
-
-// CHECK: vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
- vshufpd $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
- vcmpeqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
- vcmpleps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
- vcmpltps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
- vcmpneqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
- vcmpnleps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
- vcmpnltps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
- vcmpordps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
- vcmpunordps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmpleps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnleps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordps -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordps -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
- vcmpeqpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
- vcmplepd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
- vcmpltpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
- vcmpneqpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
- vcmpnlepd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
- vcmpnltpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
- vcmpordpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
- vcmpunordpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmplepd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqpd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnlepd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltpd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordpd -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordpd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
- vcmpeqss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
- vcmpless %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
- vcmpltss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
- vcmpneqss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
- vcmpnless %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
- vcmpnltss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
- vcmpordss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
- vcmpunordss %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpss $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqss -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmpless -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltss -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqss -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnless -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltss -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpss $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordss -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpss $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordss -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $0, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
- vcmpeqsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $2, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
- vcmplesd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $1, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
- vcmpltsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $4, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
- vcmpneqsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $6, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
- vcmpnlesd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $5, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
- vcmpnltsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
- vcmpordsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $3, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
- vcmpunordsd %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpsd $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmplesd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqsd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnlesd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltsd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vcmpsd $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
-// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
- vcmpordsd -4(%rbx,%rcx,8), %xmm6, %xmm2
-
-// CHECK: vcmpsd $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordsd -4(%rbx,%rcx,8), %xmm12, %xmm13
-
-// CHECK: vucomiss %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
- vucomiss %xmm11, %xmm12
-
-// CHECK: vucomiss (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
- vucomiss (%rax), %xmm12
-
-// CHECK: vcomiss %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
- vcomiss %xmm11, %xmm12
-
-// CHECK: vcomiss (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
- vcomiss (%rax), %xmm12
-
-// CHECK: vucomisd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
- vucomisd %xmm11, %xmm12
-
-// CHECK: vucomisd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
- vucomisd (%rax), %xmm12
-
-// CHECK: vcomisd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
- vcomisd %xmm11, %xmm12
-
-// CHECK: vcomisd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
- vcomisd (%rax), %xmm12
-
-// CHECK: vcvttss2si (%rcx), %eax
-// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
- vcvttss2si (%rcx), %eax
-
-// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
- vcvtsi2ss (%rax), %xmm11, %xmm12
-
-// CHECK: vcvtsi2ss (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
- vcvtsi2ss (%rax), %xmm11, %xmm12
-
-// CHECK: vcvttsd2si (%rcx), %eax
-// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
- vcvttsd2si (%rcx), %eax
-
-// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
- vcvtsi2sd (%rax), %xmm11, %xmm12
-
-// CHECK: vcvtsi2sd (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
- vcvtsi2sd (%rax), %xmm11, %xmm12
-
-// CHECK: vmovaps (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x28,0x20]
- vmovaps (%rax), %xmm12
-
-// CHECK: vmovaps %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
- vmovaps %xmm11, %xmm12
-
-// CHECK: vmovaps %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x29,0x18]
- vmovaps %xmm11, (%rax)
-
-// CHECK: vmovapd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x28,0x20]
- vmovapd (%rax), %xmm12
-
-// CHECK: vmovapd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
- vmovapd %xmm11, %xmm12
-
-// CHECK: vmovapd %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x29,0x18]
- vmovapd %xmm11, (%rax)
-
-// CHECK: vmovups (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x10,0x20]
- vmovups (%rax), %xmm12
-
-// CHECK: vmovups %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
- vmovups %xmm11, %xmm12
-
-// CHECK: vmovups %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x11,0x18]
- vmovups %xmm11, (%rax)
-
-// CHECK: vmovupd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x10,0x20]
- vmovupd (%rax), %xmm12
-
-// CHECK: vmovupd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
- vmovupd %xmm11, %xmm12
-
-// CHECK: vmovupd %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x11,0x18]
- vmovupd %xmm11, (%rax)
-
-// CHECK: vmovlps %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x13,0x18]
- vmovlps %xmm11, (%rax)
-
-// CHECK: vmovlps (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0x12,0x28]
- vmovlps (%rax), %xmm12, %xmm13
-
-// CHECK: vmovlpd %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x13,0x18]
- vmovlpd %xmm11, (%rax)
-
-// CHECK: vmovlpd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x12,0x28]
- vmovlpd (%rax), %xmm12, %xmm13
-
-// CHECK: vmovhps %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x17,0x18]
- vmovhps %xmm11, (%rax)
-
-// CHECK: vmovhps (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x18,0x16,0x28]
- vmovhps (%rax), %xmm12, %xmm13
-
-// CHECK: vmovhpd %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x17,0x18]
- vmovhpd %xmm11, (%rax)
-
-// CHECK: vmovhpd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x16,0x28]
- vmovhpd (%rax), %xmm12, %xmm13
-
-// CHECK: vmovlhps %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
- vmovlhps %xmm11, %xmm12, %xmm13
-
-// CHECK: vmovhlps %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
- vmovhlps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcvtss2sil %xmm11, %eax
-// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
- vcvtss2si %xmm11, %eax
-
-// CHECK: vcvtss2sil (%rax), %ebx
-// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
- vcvtss2si (%rax), %ebx
-
-// CHECK: vcvtdq2ps %xmm10, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
- vcvtdq2ps %xmm10, %xmm12
-
-// CHECK: vcvtdq2ps (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
- vcvtdq2ps (%rax), %xmm12
-
-// CHECK: vcvtsd2ss %xmm12, %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
- vcvtsd2ss %xmm12, %xmm13, %xmm10
-
-// CHECK: vcvtsd2ss (%rax), %xmm13, %xmm10
-// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
- vcvtsd2ss (%rax), %xmm13, %xmm10
-
-// CHECK: vcvtps2dq %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
- vcvtps2dq %xmm12, %xmm11
-
-// CHECK: vcvtps2dq (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
- vcvtps2dq (%rax), %xmm11
-
-// CHECK: vcvtss2sd %xmm12, %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
- vcvtss2sd %xmm12, %xmm13, %xmm10
-
-// CHECK: vcvtss2sd (%rax), %xmm13, %xmm10
-// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
- vcvtss2sd (%rax), %xmm13, %xmm10
-
-// CHECK: vcvtdq2ps %xmm13, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
- vcvtdq2ps %xmm13, %xmm10
-
-// CHECK: vcvtdq2ps (%ecx), %xmm13
-// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
- vcvtdq2ps (%ecx), %xmm13
-
-// CHECK: vcvttps2dq %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
- vcvttps2dq %xmm12, %xmm11
-
-// CHECK: vcvttps2dq (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
- vcvttps2dq (%rax), %xmm11
-
-// CHECK: vcvtps2pd %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
- vcvtps2pd %xmm12, %xmm11
-
-// CHECK: vcvtps2pd (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
- vcvtps2pd (%rax), %xmm11
-
-// CHECK: vcvtpd2ps %xmm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
- vcvtpd2ps %xmm12, %xmm11
-
-// CHECK: vsqrtpd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
- vsqrtpd %xmm11, %xmm12
-
-// CHECK: vsqrtpd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x79,0x51,0x20]
- vsqrtpd (%rax), %xmm12
-
-// CHECK: vsqrtps %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
- vsqrtps %xmm11, %xmm12
-
-// CHECK: vsqrtps (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x51,0x20]
- vsqrtps (%rax), %xmm12
-
-// CHECK: vsqrtsd %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
- vsqrtsd %xmm11, %xmm12, %xmm10
-
-// CHECK: vsqrtsd (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
- vsqrtsd (%rax), %xmm12, %xmm10
-
-// CHECK: vsqrtss %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
- vsqrtss %xmm11, %xmm12, %xmm10
-
-// CHECK: vsqrtss (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
- vsqrtss (%rax), %xmm12, %xmm10
-
-// CHECK: vrsqrtps %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
- vrsqrtps %xmm11, %xmm12
-
-// CHECK: vrsqrtps (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x52,0x20]
- vrsqrtps (%rax), %xmm12
-
-// CHECK: vrsqrtss %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
- vrsqrtss %xmm11, %xmm12, %xmm10
-
-// CHECK: vrsqrtss (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
- vrsqrtss (%rax), %xmm12, %xmm10
-
-// CHECK: vrcpps %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
- vrcpps %xmm11, %xmm12
-
-// CHECK: vrcpps (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x78,0x53,0x20]
- vrcpps (%rax), %xmm12
-
-// CHECK: vrcpss %xmm11, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
- vrcpss %xmm11, %xmm12, %xmm10
-
-// CHECK: vrcpss (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
- vrcpss (%rax), %xmm12, %xmm10
-
-// CHECK: vmovntdq %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
- vmovntdq %xmm11, (%rax)
-
-// CHECK: vmovntpd %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
- vmovntpd %xmm11, (%rax)
-
-// CHECK: vmovntps %xmm11, (%rax)
-// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
- vmovntps %xmm11, (%rax)
-
-// CHECK: vldmxcsr -4(%rip)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
- vldmxcsr -4(%rip)
-
-// CHECK: vstmxcsr -4(%rsp)
-// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
- vstmxcsr -4(%rsp)
-
-// CHECK: vpsubb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
- vpsubb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
- vpsubb (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
- vpsubw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
- vpsubw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
- vpsubd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
- vpsubd (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
- vpsubq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
- vpsubq (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubsb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
- vpsubsb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubsb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
- vpsubsb (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
- vpsubsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
- vpsubsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubusb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
- vpsubusb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubusb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
- vpsubusb (%rax), %xmm12, %xmm13
-
-// CHECK: vpsubusw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
- vpsubusw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsubusw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
- vpsubusw (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
- vpaddb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
- vpaddb (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
- vpaddw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
- vpaddw (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
- vpaddd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
- vpaddd (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
- vpaddq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
- vpaddq (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddsb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
- vpaddsb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddsb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xec,0x28]
- vpaddsb (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
- vpaddsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xed,0x28]
- vpaddsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddusb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
- vpaddusb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddusb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
- vpaddusb (%rax), %xmm12, %xmm13
-
-// CHECK: vpaddusw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
- vpaddusw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpaddusw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
- vpaddusw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhuw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
- vpmulhuw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhuw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
- vpmulhuw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
- vpmulhw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
- vpmulhw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmullw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
- vpmullw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmullw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
- vpmullw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmuludq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
- vpmuludq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmuludq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
- vpmuludq (%rax), %xmm12, %xmm13
-
-// CHECK: vpavgb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
- vpavgb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpavgb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
- vpavgb (%rax), %xmm12, %xmm13
-
-// CHECK: vpavgw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
- vpavgw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpavgw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
- vpavgw (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
- vpminsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpminsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xea,0x28]
- vpminsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpminub %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
- vpminub %xmm11, %xmm12, %xmm13
-
-// CHECK: vpminub (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xda,0x28]
- vpminub (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
- vpmaxsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaxsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xee,0x28]
- vpmaxsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxub %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
- vpmaxub %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaxub (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xde,0x28]
- vpmaxub (%rax), %xmm12, %xmm13
-
-// CHECK: vpsadbw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
- vpsadbw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsadbw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
- vpsadbw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsllw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
- vpsllw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsllw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
- vpsllw (%rax), %xmm12, %xmm13
-
-// CHECK: vpslld %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
- vpslld %xmm11, %xmm12, %xmm13
-
-// CHECK: vpslld (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
- vpslld (%rax), %xmm12, %xmm13
-
-// CHECK: vpsllq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
- vpsllq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsllq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
- vpsllq (%rax), %xmm12, %xmm13
-
-// CHECK: vpsraw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
- vpsraw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsraw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
- vpsraw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrad %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
- vpsrad %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrad (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
- vpsrad (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrlw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
- vpsrlw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrlw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
- vpsrlw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrld %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
- vpsrld %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrld (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
- vpsrld (%rax), %xmm12, %xmm13
-
-// CHECK: vpsrlq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
- vpsrlq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsrlq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
- vpsrlq (%rax), %xmm12, %xmm13
-
-// CHECK: vpslld $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
- vpslld $10, %xmm12, %xmm13
-
-// CHECK: vpslldq $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
- vpslldq $10, %xmm12, %xmm13
-
-// CHECK: vpsllq $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
- vpsllq $10, %xmm12, %xmm13
-
-// CHECK: vpsllw $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
- vpsllw $10, %xmm12, %xmm13
-
-// CHECK: vpsrad $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
- vpsrad $10, %xmm12, %xmm13
-
-// CHECK: vpsraw $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
- vpsraw $10, %xmm12, %xmm13
-
-// CHECK: vpsrld $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
- vpsrld $10, %xmm12, %xmm13
-
-// CHECK: vpsrldq $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
- vpsrldq $10, %xmm12, %xmm13
-
-// CHECK: vpsrlq $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
- vpsrlq $10, %xmm12, %xmm13
-
-// CHECK: vpsrlw $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
- vpsrlw $10, %xmm12, %xmm13
-
-// CHECK: vpslld $10, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
- vpslld $10, %xmm12, %xmm13
-
-// CHECK: vpand %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
- vpand %xmm11, %xmm12, %xmm13
-
-// CHECK: vpand (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
- vpand (%rax), %xmm12, %xmm13
-
-// CHECK: vpor %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
- vpor %xmm11, %xmm12, %xmm13
-
-// CHECK: vpor (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
- vpor (%rax), %xmm12, %xmm13
-
-// CHECK: vpxor %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
- vpxor %xmm11, %xmm12, %xmm13
-
-// CHECK: vpxor (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xef,0x28]
- vpxor (%rax), %xmm12, %xmm13
-
-// CHECK: vpandn %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
- vpandn %xmm11, %xmm12, %xmm13
-
-// CHECK: vpandn (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
- vpandn (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
- vpcmpeqb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x74,0x28]
- vpcmpeqb (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
- vpcmpeqw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x75,0x28]
- vpcmpeqw (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
- vpcmpeqd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpeqd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x76,0x28]
- vpcmpeqd (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
- vpcmpgtb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x64,0x28]
- vpcmpgtb (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
- vpcmpgtw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x65,0x28]
- vpcmpgtw (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpgtd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
- vpcmpgtd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpcmpgtd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x66,0x28]
- vpcmpgtd (%rax), %xmm12, %xmm13
-
-// CHECK: vpacksswb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
- vpacksswb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpacksswb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x63,0x28]
- vpacksswb (%rax), %xmm12, %xmm13
-
-// CHECK: vpackssdw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
- vpackssdw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpackssdw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
- vpackssdw (%rax), %xmm12, %xmm13
-
-// CHECK: vpackuswb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
- vpackuswb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpackuswb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x67,0x28]
- vpackuswb (%rax), %xmm12, %xmm13
-
-// CHECK: vpshufd $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
- vpshufd $4, %xmm12, %xmm13
-
-// CHECK: vpshufd $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
- vpshufd $4, (%rax), %xmm13
-
-// CHECK: vpshufhw $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
- vpshufhw $4, %xmm12, %xmm13
-
-// CHECK: vpshufhw $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
- vpshufhw $4, (%rax), %xmm13
-
-// CHECK: vpshuflw $4, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
- vpshuflw $4, %xmm12, %xmm13
-
-// CHECK: vpshuflw $4, (%rax), %xmm13
-// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
- vpshuflw $4, (%rax), %xmm13
-
-// CHECK: vpunpcklbw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
- vpunpcklbw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklbw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x60,0x28]
- vpunpcklbw (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpcklwd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
- vpunpcklwd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklwd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x61,0x28]
- vpunpcklwd (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckldq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
- vpunpckldq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckldq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x62,0x28]
- vpunpckldq (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpcklqdq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
- vpunpcklqdq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpcklqdq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
- vpunpcklqdq (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhbw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
- vpunpckhbw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhbw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x68,0x28]
- vpunpckhbw (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhwd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
- vpunpckhwd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhwd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x69,0x28]
- vpunpckhwd (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhdq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
- vpunpckhdq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhdq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
- vpunpckhdq (%rax), %xmm12, %xmm13
-
-// CHECK: vpunpckhqdq %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
- vpunpckhqdq %xmm11, %xmm12, %xmm13
-
-// CHECK: vpunpckhqdq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
- vpunpckhqdq (%rax), %xmm12, %xmm13
-
-// CHECK: vpinsrw $7, %eax, %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
- vpinsrw $7, %eax, %xmm12, %xmm13
-
-// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
- vpinsrw $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vpextrw $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
- vpextrw $7, %xmm12, %eax
-
-// CHECK: vpmovmskb %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
- vpmovmskb %xmm12, %eax
-
-// CHECK: vmaskmovdqu %xmm14, %xmm15
-// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
- vmaskmovdqu %xmm14, %xmm15
-
-// CHECK: vmovd %eax, %xmm14
-// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
- vmovd %eax, %xmm14
-
-// CHECK: vmovd (%rax), %xmm14
-// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
- vmovd (%rax), %xmm14
-
-// CHECK: vmovd %xmm14, (%rax)
-// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
- vmovd %xmm14, (%rax)
-
-// CHECK: vmovd %rax, %xmm14
-// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
- vmovd %rax, %xmm14
-
-// CHECK: vmovq %xmm14, (%rax)
-// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
- vmovq %xmm14, (%rax)
-
-// CHECK: vmovq %xmm14, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
- vmovq %xmm14, %xmm12
-
-// CHECK: vmovq (%rax), %xmm14
-// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
- vmovq (%rax), %xmm14
-
-// CHECK: vmovq %rax, %xmm14
-// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
- vmovq %rax, %xmm14
-
-// CHECK: vmovq %xmm14, %rax
-// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
- vmovq %xmm14, %rax
-
-// CHECK: vcvtpd2dq %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
- vcvtpd2dq %xmm11, %xmm12
-
-// CHECK: vcvtdq2pd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
- vcvtdq2pd %xmm11, %xmm12
-
-// CHECK: vcvtdq2pd (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
- vcvtdq2pd (%rax), %xmm12
-
-// CHECK: vmovshdup %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
- vmovshdup %xmm11, %xmm12
-
-// CHECK: vmovshdup (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
- vmovshdup (%rax), %xmm12
-
-// CHECK: vmovsldup %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
- vmovsldup %xmm11, %xmm12
-
-// CHECK: vmovsldup (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
- vmovsldup (%rax), %xmm12
-
-// CHECK: vmovddup %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
- vmovddup %xmm11, %xmm12
-
-// CHECK: vmovddup (%rax), %xmm12
-// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
- vmovddup (%rax), %xmm12
-
-// CHECK: vaddsubps %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
- vaddsubps %xmm11, %xmm12, %xmm13
-
-// CHECK: vaddsubps (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
- vaddsubps (%rax), %xmm11, %xmm12
-
-// CHECK: vaddsubpd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
- vaddsubpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vaddsubpd (%rax), %xmm11, %xmm12
-// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
- vaddsubpd (%rax), %xmm11, %xmm12
-
-// CHECK: vhaddps %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
- vhaddps %xmm11, %xmm12, %xmm13
-
-// CHECK: vhaddps (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
- vhaddps (%rax), %xmm12, %xmm13
-
-// CHECK: vhaddpd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
- vhaddpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vhaddpd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
- vhaddpd (%rax), %xmm12, %xmm13
-
-// CHECK: vhsubps %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
- vhsubps %xmm11, %xmm12, %xmm13
-
-// CHECK: vhsubps (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
- vhsubps (%rax), %xmm12, %xmm13
-
-// CHECK: vhsubpd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
- vhsubpd %xmm11, %xmm12, %xmm13
-
-// CHECK: vhsubpd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
- vhsubpd (%rax), %xmm12, %xmm13
-
-// CHECK: vpabsb %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
- vpabsb %xmm11, %xmm12
-
-// CHECK: vpabsb (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
- vpabsb (%rax), %xmm12
-
-// CHECK: vpabsw %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
- vpabsw %xmm11, %xmm12
-
-// CHECK: vpabsw (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
- vpabsw (%rax), %xmm12
-
-// CHECK: vpabsd %xmm11, %xmm12
-// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
- vpabsd %xmm11, %xmm12
-
-// CHECK: vpabsd (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
- vpabsd (%rax), %xmm12
-
-// CHECK: vphaddw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
- vphaddw %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
- vphaddw (%rax), %xmm12, %xmm13
-
-// CHECK: vphaddd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
- vphaddd %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
- vphaddd (%rax), %xmm12, %xmm13
-
-// CHECK: vphaddsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
- vphaddsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vphaddsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
- vphaddsw (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
- vphsubw %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
- vphsubw (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
- vphsubd %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
- vphsubd (%rax), %xmm12, %xmm13
-
-// CHECK: vphsubsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
- vphsubsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vphsubsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
- vphsubsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaddubsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
- vpmaddubsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmaddubsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
- vpmaddubsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpshufb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
- vpshufb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpshufb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
- vpshufb (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignb %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
- vpsignb %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
- vpsignb (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
- vpsignw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
- vpsignw (%rax), %xmm12, %xmm13
-
-// CHECK: vpsignd %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
- vpsignd %xmm11, %xmm12, %xmm13
-
-// CHECK: vpsignd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
- vpsignd (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulhrsw %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
- vpmulhrsw %xmm11, %xmm12, %xmm13
-
-// CHECK: vpmulhrsw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
- vpmulhrsw (%rax), %xmm12, %xmm13
-
-// CHECK: vpalignr $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
- vpalignr $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vpalignr $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
- vpalignr $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundsd $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07]
- vroundsd $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vroundsd $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07]
- vroundsd $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundss $7, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07]
- vroundss $7, %xmm11, %xmm12, %xmm13
-
-// CHECK: vroundss $7, (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07]
- vroundss $7, (%rax), %xmm12, %xmm13
-
-// CHECK: vroundpd $7, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07]
- vroundpd $7, %xmm12, %xmm13
-
-// CHECK: vroundpd $7, (%rax), %xmm13
-// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07]
- vroundpd $7, (%rax), %xmm13
-
-// CHECK: vroundps $7, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07]
- vroundps $7, %xmm12, %xmm13
-
-// CHECK: vroundps $7, (%rax), %xmm13
-// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07]
- vroundps $7, (%rax), %xmm13
-
-// CHECK: vphminposuw %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec]
- vphminposuw %xmm12, %xmm13
-
-// CHECK: vphminposuw (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
- vphminposuw (%rax), %xmm12
-
-// CHECK: vpackusdw %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
- vpackusdw %xmm12, %xmm13, %xmm11
-
-// CHECK: vpackusdw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
- vpackusdw (%rax), %xmm12, %xmm13
-
-// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
- vpcmpeqq %xmm12, %xmm13, %xmm11
-
-// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
- vpcmpeqq (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsb %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
- vpminsb %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminsb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
- vpminsb (%rax), %xmm12, %xmm13
-
-// CHECK: vpminsd %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
- vpminsd %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminsd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
- vpminsd (%rax), %xmm12, %xmm13
-
-// CHECK: vpminud %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
- vpminud %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminud (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
- vpminud (%rax), %xmm12, %xmm13
-
-// CHECK: vpminuw %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
- vpminuw %xmm12, %xmm13, %xmm11
-
-// CHECK: vpminuw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
- vpminuw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
- vpmaxsb %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxsb (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
- vpmaxsb (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
- vpmaxsd %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxsd (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
- vpmaxsd (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxud %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
- vpmaxud %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxud (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
- vpmaxud (%rax), %xmm12, %xmm13
-
-// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
- vpmaxuw %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmaxuw (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
- vpmaxuw (%rax), %xmm12, %xmm13
-
-// CHECK: vpmuldq %xmm12, %xmm13, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
- vpmuldq %xmm12, %xmm13, %xmm11
-
-// CHECK: vpmuldq (%rax), %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
- vpmuldq (%rax), %xmm12, %xmm13
-
-// CHECK: vpmulld %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc]
- vpmulld %xmm12, %xmm5, %xmm11
-
-// CHECK: vpmulld (%rax), %xmm5, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28]
- vpmulld (%rax), %xmm5, %xmm13
-
-// CHECK: vblendps $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03]
- vblendps $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vblendps $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03]
- vblendps $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vblendpd $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03]
- vblendpd $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vblendpd $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03]
- vblendpd $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vpblendw $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03]
- vpblendw $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vpblendw $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03]
- vpblendw $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vmpsadbw $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03]
- vmpsadbw $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vmpsadbw $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03]
- vmpsadbw $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vdpps $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03]
- vdpps $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vdpps $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03]
- vdpps $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vdppd $3, %xmm12, %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03]
- vdppd $3, %xmm12, %xmm5, %xmm11
-
-// CHECK: vdppd $3, (%rax), %xmm5, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
- vdppd $3, (%rax), %xmm5, %xmm11
-
-// CHECK: vblendvpd %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
- vblendvpd %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vblendvpd %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
- vblendvpd %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vblendvps %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
- vblendvps %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vblendvps %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
- vblendvps %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vpblendvb %xmm12, %xmm5, %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
- vpblendvb %xmm12, %xmm5, %xmm11, %xmm13
-
-// CHECK: vpblendvb %xmm12, (%rax), %xmm11, %xmm13
-// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
- vpblendvb %xmm12, (%rax), %xmm11, %xmm13
-
-// CHECK: vpmovsxbw %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4]
- vpmovsxbw %xmm12, %xmm10
-
-// CHECK: vpmovsxbw (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20]
- vpmovsxbw (%rax), %xmm12
-
-// CHECK: vpmovsxwd %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4]
- vpmovsxwd %xmm12, %xmm10
-
-// CHECK: vpmovsxwd (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20]
- vpmovsxwd (%rax), %xmm12
-
-// CHECK: vpmovsxdq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4]
- vpmovsxdq %xmm12, %xmm10
-
-// CHECK: vpmovsxdq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20]
- vpmovsxdq (%rax), %xmm12
-
-// CHECK: vpmovzxbw %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4]
- vpmovzxbw %xmm12, %xmm10
-
-// CHECK: vpmovzxbw (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20]
- vpmovzxbw (%rax), %xmm12
-
-// CHECK: vpmovzxwd %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4]
- vpmovzxwd %xmm12, %xmm10
-
-// CHECK: vpmovzxwd (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20]
- vpmovzxwd (%rax), %xmm12
-
-// CHECK: vpmovzxdq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4]
- vpmovzxdq %xmm12, %xmm10
-
-// CHECK: vpmovzxdq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20]
- vpmovzxdq (%rax), %xmm12
-
-// CHECK: vpmovsxbq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4]
- vpmovsxbq %xmm12, %xmm10
-
-// CHECK: vpmovsxbq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20]
- vpmovsxbq (%rax), %xmm12
-
-// CHECK: vpmovzxbq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4]
- vpmovzxbq %xmm12, %xmm10
-
-// CHECK: vpmovzxbq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20]
- vpmovzxbq (%rax), %xmm12
-
-// CHECK: vpmovsxbd %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4]
- vpmovsxbd %xmm12, %xmm10
-
-// CHECK: vpmovsxbd (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20]
- vpmovsxbd (%rax), %xmm12
-
-// CHECK: vpmovsxwq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4]
- vpmovsxwq %xmm12, %xmm10
-
-// CHECK: vpmovsxwq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20]
- vpmovsxwq (%rax), %xmm12
-
-// CHECK: vpmovzxbd %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4]
- vpmovzxbd %xmm12, %xmm10
-
-// CHECK: vpmovzxbd (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20]
- vpmovzxbd (%rax), %xmm12
-
-// CHECK: vpmovzxwq %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4]
- vpmovzxwq %xmm12, %xmm10
-
-// CHECK: vpmovzxwq (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20]
- vpmovzxwq (%rax), %xmm12
-
-// CHECK: vpextrw $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
- vpextrw $7, %xmm12, %eax
-
-// CHECK: vpextrw $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07]
- vpextrw $7, %xmm12, (%rax)
-
-// CHECK: vpextrd $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07]
- vpextrd $7, %xmm12, %eax
-
-// CHECK: vpextrd $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07]
- vpextrd $7, %xmm12, (%rax)
-
-// CHECK: vpextrb $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07]
- vpextrb $7, %xmm12, %eax
-
-// CHECK: vpextrb $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07]
- vpextrb $7, %xmm12, (%rax)
-
-// CHECK: vpextrq $7, %xmm12, %rcx
-// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07]
- vpextrq $7, %xmm12, %rcx
-
-// CHECK: vpextrq $7, %xmm12, (%rcx)
-// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
- vpextrq $7, %xmm12, (%rcx)
-
-// CHECK: vextractps $7, %xmm12, (%rax)
-// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
- vextractps $7, %xmm12, (%rax)
-
-// CHECK: vextractps $7, %xmm12, %eax
-// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
- vextractps $7, %xmm12, %eax
-
-// CHECK: vpinsrw $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
- vpinsrw $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrw $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
- vpinsrw $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrb $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
- vpinsrb $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrb $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
- vpinsrb $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrd $7, %eax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
- vpinsrd $7, %eax, %xmm12, %xmm10
-
-// CHECK: vpinsrd $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
- vpinsrd $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vpinsrq $7, %rax, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
- vpinsrq $7, %rax, %xmm12, %xmm10
-
-// CHECK: vpinsrq $7, (%rax), %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
- vpinsrq $7, (%rax), %xmm12, %xmm10
-
-// CHECK: vinsertps $7, %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
- vinsertps $7, %xmm12, %xmm10, %xmm11
-
-// CHECK: vinsertps $7, (%rax), %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
- vinsertps $7, (%rax), %xmm10, %xmm11
-
-// CHECK: vptest %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
- vptest %xmm12, %xmm10
-
-// CHECK: vptest (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
- vptest (%rax), %xmm12
-
-// CHECK: vmovntdqa (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
- vmovntdqa (%rax), %xmm12
-
-// CHECK: vpcmpgtq %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc]
- vpcmpgtq %xmm12, %xmm10, %xmm11
-
-// CHECK: vpcmpgtq (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28]
- vpcmpgtq (%rax), %xmm10, %xmm13
-
-// CHECK: vpcmpistrm $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07]
- vpcmpistrm $7, %xmm12, %xmm10
-
-// CHECK: vpcmpistrm $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07]
- vpcmpistrm $7, (%rax), %xmm10
-
-// CHECK: vpcmpestrm $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07]
- vpcmpestrm $7, %xmm12, %xmm10
-
-// CHECK: vpcmpestrm $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07]
- vpcmpestrm $7, (%rax), %xmm10
-
-// CHECK: vpcmpistri $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07]
- vpcmpistri $7, %xmm12, %xmm10
-
-// CHECK: vpcmpistri $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07]
- vpcmpistri $7, (%rax), %xmm10
-
-// CHECK: vpcmpestri $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07]
- vpcmpestri $7, %xmm12, %xmm10
-
-// CHECK: vpcmpestri $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07]
- vpcmpestri $7, (%rax), %xmm10
-
-// CHECK: vaesimc %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4]
- vaesimc %xmm12, %xmm10
-
-// CHECK: vaesimc (%rax), %xmm12
-// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20]
- vaesimc (%rax), %xmm12
-
-// CHECK: vaesenc %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc]
- vaesenc %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesenc (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28]
- vaesenc (%rax), %xmm10, %xmm13
-
-// CHECK: vaesenclast %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc]
- vaesenclast %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesenclast (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28]
- vaesenclast (%rax), %xmm10, %xmm13
-
-// CHECK: vaesdec %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc]
- vaesdec %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesdec (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28]
- vaesdec (%rax), %xmm10, %xmm13
-
-// CHECK: vaesdeclast %xmm12, %xmm10, %xmm11
-// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc]
- vaesdeclast %xmm12, %xmm10, %xmm11
-
-// CHECK: vaesdeclast (%rax), %xmm10, %xmm13
-// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28]
- vaesdeclast (%rax), %xmm10, %xmm13
-
-// CHECK: vaeskeygenassist $7, %xmm12, %xmm10
-// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07]
- vaeskeygenassist $7, %xmm12, %xmm10
-
-// CHECK: vaeskeygenassist $7, (%rax), %xmm10
-// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07]
- vaeskeygenassist $7, (%rax), %xmm10
-
-// CHECK: vcmpps $8, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
- vcmpeq_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $9, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
- vcmpngeps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $10, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
- vcmpngtps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $11, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
- vcmpfalseps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $12, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
- vcmpneq_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $13, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
- vcmpgeps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $14, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
- vcmpgtps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $15, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
- vcmptrueps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $16, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
- vcmpeq_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $17, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
- vcmplt_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $18, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
- vcmple_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $19, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
- vcmpunord_sps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $20, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
- vcmpneq_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $21, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
- vcmpnlt_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $22, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
- vcmpnle_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $23, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
- vcmpord_sps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $24, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
- vcmpeq_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $25, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
- vcmpnge_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $26, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
- vcmpngt_uqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $27, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
- vcmpfalse_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $28, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
- vcmpneq_osps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $29, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
- vcmpge_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $30, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
- vcmpgt_oqps %xmm11, %xmm12, %xmm13
-
-// CHECK: vcmpps $31, %xmm11, %xmm12, %xmm13
-// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
- vcmptrue_usps %xmm11, %xmm12, %xmm13
-
-// CHECK: vmovaps (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
- vmovaps (%rax), %ymm12
-
-// CHECK: vmovaps %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
- vmovaps %ymm11, %ymm12
-
-// CHECK: vmovaps %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
- vmovaps %ymm11, (%rax)
-
-// CHECK: vmovapd (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
- vmovapd (%rax), %ymm12
-
-// CHECK: vmovapd %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
- vmovapd %ymm11, %ymm12
-
-// CHECK: vmovapd %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
- vmovapd %ymm11, (%rax)
-
-// CHECK: vmovups (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
- vmovups (%rax), %ymm12
-
-// CHECK: vmovups %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
- vmovups %ymm11, %ymm12
-
-// CHECK: vmovups %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
- vmovups %ymm11, (%rax)
-
-// CHECK: vmovupd (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
- vmovupd (%rax), %ymm12
-
-// CHECK: vmovupd %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
- vmovupd %ymm11, %ymm12
-
-// CHECK: vmovupd %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
- vmovupd %ymm11, (%rax)
-
-// CHECK: vunpckhps %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
- vunpckhps %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpckhpd %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
- vunpckhpd %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpcklps %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
- vunpcklps %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpcklpd %ymm11, %ymm12, %ymm4
-// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
- vunpcklpd %ymm11, %ymm12, %ymm4
-
-// CHECK: vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
- vunpckhps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
- vunpckhpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
- vunpcklps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
- vunpcklpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vmovntdq %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0xe7,0x18]
- vmovntdq %ymm11, (%rax)
-
-// CHECK: vmovntpd %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7d,0x2b,0x18]
- vmovntpd %ymm11, (%rax)
-
-// CHECK: vmovntps %ymm11, (%rax)
-// CHECK: encoding: [0xc5,0x7c,0x2b,0x18]
- vmovntps %ymm11, (%rax)
-
-// CHECK: vmovmskps %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4]
- vmovmskps %xmm12, %eax
-
-// CHECK: vmovmskpd %xmm12, %eax
-// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4]
- vmovmskpd %xmm12, %eax
-
-// CHECK: vmaxps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4]
- vmaxps %ymm12, %ymm4, %ymm6
-
-// CHECK: vmaxpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4]
- vmaxpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vminps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4]
- vminps %ymm12, %ymm4, %ymm6
-
-// CHECK: vminpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4]
- vminpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vsubps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4]
- vsubps %ymm12, %ymm4, %ymm6
-
-// CHECK: vsubpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4]
- vsubpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vdivps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4]
- vdivps %ymm12, %ymm4, %ymm6
-
-// CHECK: vdivpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
- vdivpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vaddps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
- vaddps %ymm12, %ymm4, %ymm6
-
-// CHECK: vaddpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
- vaddpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vmulps %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
- vmulps %ymm12, %ymm4, %ymm6
-
-// CHECK: vmulpd %ymm12, %ymm4, %ymm6
-// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
- vmulpd %ymm12, %ymm4, %ymm6
-
-// CHECK: vmaxps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
- vmaxps (%rax), %ymm4, %ymm6
-
-// CHECK: vmaxpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
- vmaxpd (%rax), %ymm4, %ymm6
-
-// CHECK: vminps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
- vminps (%rax), %ymm4, %ymm6
-
-// CHECK: vminpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
- vminpd (%rax), %ymm4, %ymm6
-
-// CHECK: vsubps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
- vsubps (%rax), %ymm4, %ymm6
-
-// CHECK: vsubpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
- vsubpd (%rax), %ymm4, %ymm6
-
-// CHECK: vdivps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
- vdivps (%rax), %ymm4, %ymm6
-
-// CHECK: vdivpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
- vdivpd (%rax), %ymm4, %ymm6
-
-// CHECK: vaddps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
- vaddps (%rax), %ymm4, %ymm6
-
-// CHECK: vaddpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
- vaddpd (%rax), %ymm4, %ymm6
-
-// CHECK: vmulps (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
- vmulps (%rax), %ymm4, %ymm6
-
-// CHECK: vmulpd (%rax), %ymm4, %ymm6
-// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
- vmulpd (%rax), %ymm4, %ymm6
-
-// CHECK: vsqrtpd %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3]
- vsqrtpd %ymm11, %ymm12
-
-// CHECK: vsqrtpd (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7d,0x51,0x20]
- vsqrtpd (%rax), %ymm12
-
-// CHECK: vsqrtps %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3]
- vsqrtps %ymm11, %ymm12
-
-// CHECK: vsqrtps (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x51,0x20]
- vsqrtps (%rax), %ymm12
-
-// CHECK: vrsqrtps %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3]
- vrsqrtps %ymm11, %ymm12
-
-// CHECK: vrsqrtps (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x52,0x20]
- vrsqrtps (%rax), %ymm12
-
-// CHECK: vrcpps %ymm11, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3]
- vrcpps %ymm11, %ymm12
-
-// CHECK: vrcpps (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x53,0x20]
- vrcpps (%rax), %ymm12
-
-// CHECK: vandps %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc]
- vandps %ymm12, %ymm14, %ymm11
-
-// CHECK: vandpd %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc]
- vandpd %ymm12, %ymm14, %ymm11
-
-// CHECK: vandps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc]
- vandps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc]
- vandpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vorps %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc]
- vorps %ymm12, %ymm14, %ymm11
-
-// CHECK: vorpd %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc]
- vorpd %ymm12, %ymm14, %ymm11
-
-// CHECK: vorps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc]
- vorps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc]
- vorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vxorps %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc]
- vxorps %ymm12, %ymm14, %ymm11
-
-// CHECK: vxorpd %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc]
- vxorpd %ymm12, %ymm14, %ymm11
-
-// CHECK: vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc]
- vxorps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc]
- vxorpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandnps %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc]
- vandnps %ymm12, %ymm14, %ymm11
-
-// CHECK: vandnpd %ymm12, %ymm14, %ymm11
-// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc]
- vandnpd %ymm12, %ymm14, %ymm11
-
-// CHECK: vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc]
- vandnps -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
- vandnpd -4(%rbx,%rcx,8), %ymm12, %ymm10
-
-// CHECK: vcvtps2pd %xmm13, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
- vcvtps2pd %xmm13, %ymm12
-
-// CHECK: vcvtps2pd (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
- vcvtps2pd (%rax), %ymm12
-
-// CHECK: vcvtdq2pd %xmm13, %ymm12
-// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
- vcvtdq2pd %xmm13, %ymm12
-
-// CHECK: vcvtdq2pd (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
- vcvtdq2pd (%rax), %ymm12
-
-// CHECK: vcvtdq2ps %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
- vcvtdq2ps %ymm12, %ymm10
-
-// CHECK: vcvtdq2ps (%rax), %ymm12
-// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
- vcvtdq2ps (%rax), %ymm12
-
-// CHECK: vcvtps2dq %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
- vcvtps2dq %ymm12, %ymm10
-
-// CHECK: vcvtps2dq (%rax), %ymm10
-// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
- vcvtps2dq (%rax), %ymm10
-
-// CHECK: vcvttps2dq %ymm12, %ymm10
-// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
- vcvttps2dq %ymm12, %ymm10
-
-// CHECK: vcvttps2dq (%rax), %ymm10
-// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
- vcvttps2dq (%rax), %ymm10
-
-// CHECK: vcvttpd2dq %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
- vcvttpd2dq %xmm11, %xmm10
-
-// CHECK: vcvttpd2dq %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
- vcvttpd2dq %ymm12, %xmm10
-
-// CHECK: vcvttpd2dqx %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
- vcvttpd2dqx %xmm11, %xmm10
-
-// CHECK: vcvttpd2dqx (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
- vcvttpd2dqx (%rax), %xmm11
-
-// CHECK: vcvttpd2dqy %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
- vcvttpd2dqy %ymm12, %xmm11
-
-// CHECK: vcvttpd2dqy (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
- vcvttpd2dqy (%rax), %xmm11
-
-// CHECK: vcvtpd2ps %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
- vcvtpd2ps %ymm12, %xmm10
-
-// CHECK: vcvtpd2psx %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
- vcvtpd2psx %xmm11, %xmm10
-
-// CHECK: vcvtpd2psx (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
- vcvtpd2psx (%rax), %xmm11
-
-// CHECK: vcvtpd2psy %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
- vcvtpd2psy %ymm12, %xmm11
-
-// CHECK: vcvtpd2psy (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
- vcvtpd2psy (%rax), %xmm11
-
-// CHECK: vcvtpd2dq %ymm12, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
- vcvtpd2dq %ymm12, %xmm10
-
-// CHECK: vcvtpd2dqy %ymm12, %xmm11
-// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
- vcvtpd2dqy %ymm12, %xmm11
-
-// CHECK: vcvtpd2dqy (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
- vcvtpd2dqy (%rax), %xmm11
-
-// CHECK: vcvtpd2dqx %xmm11, %xmm10
-// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
- vcvtpd2dqx %xmm11, %xmm10
-
-// CHECK: vcvtpd2dqx (%rax), %xmm11
-// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
- vcvtpd2dqx (%rax), %xmm11
-
-// CHECK: vcmpps $0, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
- vcmpeqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $2, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
- vcmpleps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $1, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
- vcmpltps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $4, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
- vcmpneqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $6, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
- vcmpnleps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $5, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
- vcmpnltps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $7, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
- vcmpordps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $3, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
- vcmpunordps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
-// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
- vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
-
-// CHECK: vcmpps $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $0, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
- vcmpeqpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $2, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
- vcmplepd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $1, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
- vcmpltpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $4, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
- vcmpneqpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $6, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
- vcmpnlepd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $5, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
- vcmpnltpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $7, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
- vcmpordpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $3, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
- vcmpunordpd %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmppd $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
- vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
- vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
- vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
- vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
- vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
- vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmppd $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
-// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
- vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
-
-// CHECK: vcmppd $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
-// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
- vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
-
-// CHECK: vcmpps $8, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
- vcmpeq_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $9, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
- vcmpngeps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $10, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
- vcmpngtps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $11, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
- vcmpfalseps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $12, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
- vcmpneq_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $13, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
- vcmpgeps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $14, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
- vcmpgtps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $15, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
- vcmptrueps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $16, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
- vcmpeq_osps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $17, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
- vcmplt_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $18, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
- vcmple_oqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $19, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
- vcmpunord_sps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $20, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
- vcmpneq_usps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $21, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
- vcmpnlt_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $22, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
- vcmpnle_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $23, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
- vcmpord_sps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $24, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
- vcmpeq_usps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $25, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
- vcmpnge_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $26, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
- vcmpngt_uqps %ymm11, %ymm12, %ymm13
-
-// CHECK: vcmpps $27, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
- vcmpfalse_osps %ymm11, %ymm12, %ymm13
+// CHECK: movq 57005(,%riz), %rbx
+// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+ movq 57005(,%riz), %rbx
-// CHECK: vcmpps $28, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
- vcmpneq_osps %ymm11, %ymm12, %ymm13
+// CHECK: movq 48879(,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+ movq 48879(,%riz), %rax
-// CHECK: vcmpps $29, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
- vcmpge_oqps %ymm11, %ymm12, %ymm13
+// CHECK: movq -4(,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+ movq -4(,%riz,8), %rax
-// CHECK: vcmpps $30, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
- vcmpgt_oqps %ymm11, %ymm12, %ymm13
+// CHECK: movq (%rcx,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x21]
+ movq (%rcx,%riz), %rax
-// CHECK: vcmpps $31, %ymm11, %ymm12, %ymm13
-// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
- vcmptrue_usps %ymm11, %ymm12, %ymm13
+// CHECK: movq (%rcx,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe1]
+ movq (%rcx,%riz,8), %rax
diff --git a/test/MC/AsmParser/X86/x86_64-fma3-encoding.s b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s
new file mode 100644
index 000000000000..d08a7329a09f
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_64-fma3-encoding.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+ vfmadd132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+ vfmadd132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+ vfmadd132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+ vfmadd132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+ vfmadd213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+ vfmadd213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+ vfmadd213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+ vfmadd213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+ vfmadd231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+ vfmadd231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+ vfmadd231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+ vfmadd231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+ vfmadd132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+ vfmadd132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+ vfmadd132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+ vfmadd132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+ vfmadd213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+ vfmadd213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+ vfmadd213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+ vfmadd213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+ vfmadd231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+ vfmadd231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+ vfmadd231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+ vfmadd231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+ vfmadd132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+ vfmadd132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+ vfmadd132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+ vfmadd132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+ vfmadd213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+ vfmadd213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+ vfmadd213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+ vfmadd213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+ vfmadd231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+ vfmadd231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+ vfmadd231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+ vfmadd231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc]
+ vfmaddsub132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18]
+ vfmaddsub132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc]
+ vfmaddsub132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18]
+ vfmaddsub132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc]
+ vfmaddsub213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18]
+ vfmaddsub213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc]
+ vfmaddsub213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18]
+ vfmaddsub213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc]
+ vfmaddsub231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18]
+ vfmaddsub231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc]
+ vfmaddsub231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18]
+ vfmaddsub231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc]
+ vfmsubadd132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18]
+ vfmsubadd132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc]
+ vfmsubadd132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18]
+ vfmsubadd132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc]
+ vfmsubadd213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18]
+ vfmsubadd213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc]
+ vfmsubadd213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18]
+ vfmsubadd213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc]
+ vfmsubadd231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18]
+ vfmsubadd231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc]
+ vfmsubadd231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18]
+ vfmsubadd231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc]
+ vfmsub132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18]
+ vfmsub132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc]
+ vfmsub132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18]
+ vfmsub132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc]
+ vfmsub213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18]
+ vfmsub213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc]
+ vfmsub213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18]
+ vfmsub213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc]
+ vfmsub231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18]
+ vfmsub231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc]
+ vfmsub231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18]
+ vfmsub231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc]
+ vfnmadd132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18]
+ vfnmadd132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc]
+ vfnmadd132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18]
+ vfnmadd132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc]
+ vfnmadd213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18]
+ vfnmadd213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc]
+ vfnmadd213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18]
+ vfnmadd213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc]
+ vfnmadd231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18]
+ vfnmadd231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc]
+ vfnmadd231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18]
+ vfnmadd231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc]
+ vfnmsub132pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18]
+ vfnmsub132pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc]
+ vfnmsub132ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18]
+ vfnmsub132ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc]
+ vfnmsub213pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18]
+ vfnmsub213pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc]
+ vfnmsub213ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18]
+ vfnmsub213ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc]
+ vfnmsub231pd %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18]
+ vfnmsub231pd (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc]
+ vfnmsub231ps %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18]
+ vfnmsub231ps (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+ vfmadd132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+ vfmadd132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+ vfmadd132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+ vfmadd132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+ vfmadd213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+ vfmadd213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+ vfmadd213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+ vfmadd213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+ vfmadd231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+ vfmadd231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+ vfmadd231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+ vfmadd231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc]
+ vfmaddsub132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18]
+ vfmaddsub132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc]
+ vfmaddsub132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18]
+ vfmaddsub132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc]
+ vfmaddsub213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18]
+ vfmaddsub213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc]
+ vfmaddsub213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18]
+ vfmaddsub213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc]
+ vfmaddsub231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18]
+ vfmaddsub231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc]
+ vfmaddsub231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18]
+ vfmaddsub231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc]
+ vfmsubadd132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18]
+ vfmsubadd132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc]
+ vfmsubadd132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18]
+ vfmsubadd132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc]
+ vfmsubadd213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18]
+ vfmsubadd213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc]
+ vfmsubadd213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18]
+ vfmsubadd213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc]
+ vfmsubadd231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18]
+ vfmsubadd231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc]
+ vfmsubadd231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18]
+ vfmsubadd231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc]
+ vfmsub132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18]
+ vfmsub132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc]
+ vfmsub132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18]
+ vfmsub132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc]
+ vfmsub213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18]
+ vfmsub213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc]
+ vfmsub213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18]
+ vfmsub213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc]
+ vfmsub231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18]
+ vfmsub231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc]
+ vfmsub231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18]
+ vfmsub231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc]
+ vfnmadd132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18]
+ vfnmadd132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc]
+ vfnmadd132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18]
+ vfnmadd132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc]
+ vfnmadd213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18]
+ vfnmadd213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc]
+ vfnmadd213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18]
+ vfnmadd213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc]
+ vfnmadd231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18]
+ vfnmadd231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc]
+ vfnmadd231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18]
+ vfnmadd231ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc]
+ vfnmsub132pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18]
+ vfnmsub132pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc]
+ vfnmsub132ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18]
+ vfnmsub132ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc]
+ vfnmsub213pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18]
+ vfnmsub213pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc]
+ vfnmsub213ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18]
+ vfnmsub213ps (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc]
+ vfnmsub231pd %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18]
+ vfnmsub231pd (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc]
+ vfnmsub231ps %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18]
+ vfnmsub231ps (%rax), %ymm10, %ymm11
+
diff --git a/test/MC/AsmParser/X86/x86_64-new-encoder.s b/test/MC/AsmParser/X86/x86_64-new-encoder.s
index 1858441870ac..9f94d8404f42 100644
--- a/test/MC/AsmParser/X86/x86_64-new-encoder.s
+++ b/test/MC/AsmParser/X86/x86_64-new-encoder.s
@@ -72,9 +72,9 @@ stosl
// Not moffset forms of moves, they are x86-32 only! rdar://7947184
-movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,A,A,A,A]
-movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,A,A,A,A]
-movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,A,A,A,A]
+movb 0, %al // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00]
+movw 0, %ax // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
+movl 0, %eax // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
// CHECK: pushfq # encoding: [0x9c]
pushf
@@ -150,3 +150,10 @@ btq $0x01,%rdx
// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
movl %gs:124, %eax
+// CHECK: jmpq *8(%rax)
+// CHECK: encoding: [0xff,0x60,0x08]
+ jmp *8(%rax)
+
+// CHECK: btq $61, -216(%rbp)
+// CHECK: encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d]
+ btq $61, -216(%rbp)
diff --git a/test/MC/AsmParser/X86/x86_instruction_errors.s b/test/MC/AsmParser/X86/x86_instruction_errors.s
new file mode 100644
index 000000000000..183306be2c11
--- /dev/null
+++ b/test/MC/AsmParser/X86/x86_instruction_errors.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err
+// RUN: FileCheck < %t.err %s
+
+// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq')
+cmp $0, 0(%eax)
diff --git a/test/MC/AsmParser/X86/x86_instructions.s b/test/MC/AsmParser/X86/x86_instructions.s
index 4bc8a4bb3a84..a82d2a1c0d41 100644
--- a/test/MC/AsmParser/X86/x86_instructions.s
+++ b/test/MC/AsmParser/X86/x86_instructions.s
@@ -1,4 +1,6 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err
+// RUN: FileCheck < %t %s
+// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s
// CHECK: subb %al, %al
subb %al, %al
@@ -56,7 +58,7 @@
subl %eax, %ebx
// FIXME: Check that this matches the correct instruction.
-// CHECK: call *%rax
+// CHECK: callq *%rax
call *%rax
// FIXME: Check that this matches the correct instruction.
@@ -151,3 +153,23 @@ fadd %st(7)
// CHECK: int3
INT3
+
+// Allow scale factor without index register.
+// CHECK: movaps %xmm3, (%esi)
+// CHECK-STDERR: warning: scale factor without index register is ignored
+movaps %xmm3, (%esi, 2)
+
+// CHECK: imull $12, %eax, %eax
+imul $12, %eax
+
+// CHECK: imull %ecx, %eax
+imull %ecx, %eax
+
+// PR8114
+// CHECK: outb %al, %dx
+// CHECK: outw %ax, %dx
+// CHECK: outl %eax, %dx
+
+out %al, (%dx)
+out %ax, (%dx)
+outl %eax, (%dx)
diff --git a/test/MC/AsmParser/X86/x86_operands.s b/test/MC/AsmParser/X86/x86_operands.s
index bf958d8478ca..ddadf7931895 100644
--- a/test/MC/AsmParser/X86/x86_operands.s
+++ b/test/MC/AsmParser/X86/x86_operands.s
@@ -1,5 +1,3 @@
-// FIXME: Actually test that we get the expected results.
-
// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
# Immediates
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
index 64cb75b20ff1..a6d81da5b716 100644
--- a/test/MC/AsmParser/dg.exp
+++ b/test/MC/AsmParser/dg.exp
@@ -1,4 +1,5 @@
load_lib llvm.exp
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-
+if { [llvm_supports_target X86] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+}
diff --git a/test/MC/AsmParser/directive_abort.s b/test/MC/AsmParser/directive_abort.s
index 3eb8e96f2f88..1fd1f6e44a32 100644
--- a/test/MC/AsmParser/directive_abort.s
+++ b/test/MC/AsmParser/directive_abort.s
@@ -1,6 +1,6 @@
# RUN: llvm-mc -triple i386-unknown-unknown %s 2> %t
# RUN: FileCheck -input-file %t %s
-# CHECK: .abort "please stop assembing"
-TEST0:
- .abort "please stop assembing"
+# CHECK: error: .abort 'please stop assembing'
+TEST0:
+ .abort please stop assembing
diff --git a/test/MC/AsmParser/directive_elf_size.s b/test/MC/AsmParser/directive_elf_size.s
new file mode 100644
index 000000000000..af35ae07ed6c
--- /dev/null
+++ b/test/MC/AsmParser/directive_elf_size.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+a:
+ ret
+.Lt:
+# CHECK: .size a, .Lt-a
+ .size a, .Lt-a
+
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
index beac69a4aeb1..c7617a337e02 100644
--- a/test/MC/AsmParser/directive_values.s
+++ b/test/MC/AsmParser/directive_values.s
@@ -19,3 +19,20 @@ TEST2:
# CHECK: .quad 9
TEST3:
.quad 9
+
+
+# rdar://7997827
+TEST4:
+ .quad 0b0100
+ .quad 4294967295
+ .quad 4294967295+1
+ .quad 4294967295LL+1
+ .quad 0b10LL + 07ULL + 0x42AULL
+# CHECK: TEST4
+# CHECK: .quad 4
+# CHECK: .quad 4294967295
+# CHECK: .quad 4294967296
+# CHECK: .quad 4294967296
+# CHECK: .quad 1075
+
+
diff --git a/test/MC/AsmParser/dollars-in-identifiers.s b/test/MC/AsmParser/dollars-in-identifiers.s
new file mode 100644
index 000000000000..e56959062ad9
--- /dev/null
+++ b/test/MC/AsmParser/dollars-in-identifiers.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s > %t
+# RUN: FileCheck < %t %s
+
+// CHECK: .globl $foo
+.globl $foo
+// CHECK: .long ($foo)
+.long ($foo)
diff --git a/test/MC/AsmParser/macro-def-in-instantiation.s b/test/MC/AsmParser/macro-def-in-instantiation.s
new file mode 100644
index 000000000000..b6483b3b32b2
--- /dev/null
+++ b/test/MC/AsmParser/macro-def-in-instantiation.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s
+
+.macro .make_macro
+$0 $1
+$2 $3
+$4
+.endmacro
+
+.make_macro .macro,.mybyte,.byte,$0,.endmacro
+
+.data
+// CHECK: .byte 10
+.mybyte 10
diff --git a/test/MC/AsmParser/macros-parsing.s b/test/MC/AsmParser/macros-parsing.s
new file mode 100644
index 000000000000..65f64546cc13
--- /dev/null
+++ b/test/MC/AsmParser/macros-parsing.s
@@ -0,0 +1,23 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+
+.macro .test0
+.endmacro
+
+.macros_off
+// CHECK-ERRORS: 9:1: warning: ignoring directive for now
+.test0
+.macros_on
+
+.test0
+
+// CHECK-ERRORS: macro '.test0' is already defined
+.macro .test0
+.endmacro
+
+// CHECK-ERRORS: unexpected '.endmacro' in file
+.endmacro
+
+// CHECK-ERRORS: no matching '.endmacro' in definition
+.macro dummy
+
diff --git a/test/MC/AsmParser/macros.s b/test/MC/AsmParser/macros.s
new file mode 100644
index 000000000000..214274d9aa84
--- /dev/null
+++ b/test/MC/AsmParser/macros.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+
+.macro .test0
+.macrobody0
+.endmacro
+.macro .test1
+.test0
+.endmacro
+
+.test1
+// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now
+// CHECK-ERRORS-NEXT: macrobody0
+// CHECK-ERRORS-NEXT: ^
+// CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation
+// CHECK-ERRORS-NEXT: .test0
+// CHECK-ERRORS-NEXT: ^
+// CHECK-ERRORS: 11:1: note: while in macro instantiation
+// CHECK-ERRORS-NEXT: .test1
+// CHECK-ERRORS-NEXT: ^
+
+.macro test2
+.byte $0
+.endmacro
+test2 10
+
+.macro test3
+.globl "$0 $1 $2 $$3 $n"
+.endmacro
+
+// CHECK: .globl "1 23 $3 2"
+test3 1,2 3
+
+.macro test4
+.globl "$0 -- $1"
+.endmacro
+
+// CHECK: .globl "ab)(,) -- (cd)"
+test4 a b)(,),(cd)
diff --git a/test/MC/COFF/basic-coff.ll b/test/MC/COFF/basic-coff.ll
new file mode 100644
index 000000000000..1e67db0e5c42
--- /dev/null
+++ b/test/MC/COFF/basic-coff.ll
@@ -0,0 +1,136 @@
+; This test checks that the COFF object emitter works for the most basic
+; programs.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: coff-dump.py %abs_tmp | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+@.str = private constant [12 x i8] c"Hello World\00" ; <[12 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; CHECK: {
+; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C)
+; CHECK: NumberOfSections = 2
+; CHECK: TimeDateStamp = {{[0-9]+}}
+; CHECK: PointerToSymbolTable = 0x99
+; CHECK: NumberOfSymbols = 7
+; CHECK: SizeOfOptionalHeader = 0
+; CHECK: Characteristics = 0x0
+; CHECK: Sections = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 21
+; CHECK: PointerToRawData = 0x64
+; CHECK: PointerToRelocations = 0x79
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 2
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0x60500020
+; CHECK: IMAGE_SCN_CNT_CODE
+; CHECK: IMAGE_SCN_ALIGN_16BYTES
+; CHECK: IMAGE_SCN_MEM_EXECUTE
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: SectionData =
+; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 31 |.....$.........1|
+; CHECK: C0 83 C4 04 C3 |.....|
+; CHECK: Relocations = [
+; CHECK: 0 = {
+; CHECK: VirtualAddress = 0x6
+; CHECK: SymbolTableIndex = 5
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _main
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: VirtualAddress = 0xB
+; CHECK: SymbolTableIndex = 6
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = L_.str
+; CHECK: }
+; CHECK: ]
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 12
+; CHECK: PointerToRawData = 0x8D
+; CHECK: PointerToRelocations = 0x0
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 0
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0xC0100040
+; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK: IMAGE_SCN_ALIGN_1BYTES
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: IMAGE_SCN_MEM_WRITE
+; CHECK: SectionData =
+; CHECK: 48 65 6C 6C 6F 20 57 6F - 72 6C 64 00 |Hello World.|
+; CHECK: Relocations = None
+; CHECK: }
+; CHECK: ]
+; CHECK: Symbols = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
+; CHECK: 00 00 |..|
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+; CHECK: 00 00 |..|
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: Name = _main
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: Name = L_.str
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+; CHECK: }
+; CHECK: 4 = {
+; CHECK: Name = _printf
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+; CHECK: }
+; CHECK: ]
+; CHECK: }
diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp
new file mode 100644
index 000000000000..7b7bd4e73807
--- /dev/null
+++ b/test/MC/COFF/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/MC/COFF/switch-relocations.ll b/test/MC/COFF/switch-relocations.ll
new file mode 100644
index 000000000000..300c10732ec6
--- /dev/null
+++ b/test/MC/COFF/switch-relocations.ll
@@ -0,0 +1,34 @@
+; The purpose of this test is to see if the COFF object writer can properly
+; relax the fixups that are created for jump tables on x86-64. See PR7960.
+
+; This test case was reduced from Lua/lapi.c.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+define void @lua_gc(i32 %what) nounwind {
+entry:
+ switch i32 %what, label %sw.epilog [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb14
+ i32 4, label %sw.bb18
+ i32 6, label %sw.bb57
+ ]
+
+sw.bb: ; preds = %entry, %entry, %entry
+ ret void
+
+sw.bb14: ; preds = %entry
+ ret void
+
+sw.bb18: ; preds = %entry
+ ret void
+
+sw.bb57: ; preds = %entry
+ ret void
+
+sw.epilog: ; preds = %entry
+ ret void
+}
diff --git a/test/MC/COFF/symbol-fragment-offset.ll b/test/MC/COFF/symbol-fragment-offset.ll
new file mode 100644
index 000000000000..af7ace19d59f
--- /dev/null
+++ b/test/MC/COFF/symbol-fragment-offset.ll
@@ -0,0 +1,182 @@
+; The purpose of this test is to see if the COFF object writer is emitting the
+; proper relocations for multiple pieces of data in a single data fragment.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: coff-dump.py %abs_tmp | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+@.str = private constant [7 x i8] c"Hello \00" ; <[7 x i8]*> [#uses=1]
+@str = internal constant [7 x i8] c"World!\00" ; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+ %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+ %puts = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+; CHECK: {
+; CHECK: MachineType = IMAGE_FILE_MACHINE_I386 (0x14C)
+; CHECK: NumberOfSections = 2
+; CHECK: TimeDateStamp = {{[0-9]+}}
+; CHECK: PointerToSymbolTable = 0xBB
+; CHECK: NumberOfSymbols = 9
+; CHECK: SizeOfOptionalHeader = 0
+; CHECK: Characteristics = 0x0
+; CHECK: Sections = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 33
+; CHECK: PointerToRawData = 0x64
+; CHECK: PointerToRelocations = 0x85
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 4
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0x60500020
+; CHECK: IMAGE_SCN_CNT_CODE
+; CHECK: IMAGE_SCN_ALIGN_16BYTES
+; CHECK: IMAGE_SCN_MEM_EXECUTE
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: SectionData =
+; CHECK: 83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+; CHECK: 04 24 00 00 00 00 E8 00 - 00 00 00 31 C0 83 C4 04 |.$.........1....|
+; CHECK: C3 |.|
+
+; CHECK: Relocations = [
+; CHECK: 0 = {
+; CHECK: VirtualAddress = 0x6
+; CHECK: SymbolTableIndex = 5
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _main
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: VirtualAddress = 0xB
+; CHECK: SymbolTableIndex = 6
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = L_.str
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: VirtualAddress = 0x12
+; CHECK: SymbolTableIndex = 7
+; CHECK: Type = IMAGE_REL_I386_DIR32 (6)
+; CHECK: SymbolName = _printf
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: VirtualAddress = 0x17
+; CHECK: SymbolTableIndex = 8
+; CHECK: Type = IMAGE_REL_I386_REL32 (20)
+; CHECK: SymbolName = _str
+; CHECK: }
+; CHECK: ]
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: VirtualSize = 0
+; CHECK: VirtualAddress = 0
+; CHECK: SizeOfRawData = 14
+; CHECK: PointerToRawData = 0xAD
+; CHECK: PointerToRelocations = 0x0
+; CHECK: PointerToLineNumbers = 0x0
+; CHECK: NumberOfRelocations = 0
+; CHECK: NumberOfLineNumbers = 0
+; CHECK: Charateristics = 0xC0100040
+; CHECK: IMAGE_SCN_CNT_INITIALIZED_DATA
+; CHECK: IMAGE_SCN_ALIGN_1BYTES
+; CHECK: IMAGE_SCN_MEM_READ
+; CHECK: IMAGE_SCN_MEM_WRITE
+; CHECK: SectionData =
+; CHECK: 48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 |Hello .World!.|
+
+; CHECK: Relocations = None
+; CHECK: }
+; CHECK: ]
+; CHECK: Symbols = [
+; CHECK: 0 = {
+; CHECK: Name = .text
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 21 00 00 00 04 00 00 00 - 00 00 00 00 01 00 00 00 |!...............|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 1 = {
+; CHECK: Name = .data
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 1
+; CHECK: AuxillaryData =
+; CHECK: 0E 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+; CHECK: 00 00 |..|
+
+; CHECK: }
+; CHECK: 2 = {
+; CHECK: Name = _main
+; CHECK: Value = 0
+; CHECK: SectionNumber = 1
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_FUNCTION (2)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 3 = {
+; CHECK: Name = L_.str
+; CHECK: Value = 0
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 4 = {
+; CHECK: Name = _printf
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 5 = {
+; CHECK: Name = _str
+; CHECK: Value = 7
+; CHECK: SectionNumber = 2
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_STATIC (3)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: 6 = {
+; CHECK: Name = _puts
+; CHECK: Value = 0
+; CHECK: SectionNumber = 0
+; CHECK: SimpleType = IMAGE_SYM_TYPE_NULL (0)
+; CHECK: ComplexType = IMAGE_SYM_DTYPE_NULL (0)
+; CHECK: StorageClass = IMAGE_SYM_CLASS_EXTERNAL (2)
+; CHECK: NumberOfAuxSymbols = 0
+; CHECK: AuxillaryData =
+
+; CHECK: }
+; CHECK: ]
+; CHECK: }
diff --git a/test/MC/Disassembler/arm-tests.txt b/test/MC/Disassembler/arm-tests.txt
index a1e229caebf8..0b4c2978fe3d 100644
--- a/test/MC/Disassembler/arm-tests.txt
+++ b/test/MC/Disassembler/arm-tests.txt
@@ -12,9 +12,21 @@
# CHECK: cmn r0, #1
0x01 0x00 0x70 0xe3
+# CHECK: dmb
+0x5f 0xf0 0x7f 0xf5
+
# CHECK: dmb nshst
0x56 0xf0 0x7f 0xf5
+# CHECK: dsb
+0x4f 0xf0 0x7f 0xf5
+
+# CHECK: dsb st
+0x4e 0xf0 0x7f 0xf5
+
+# CHECK: isb
+0x6f 0xf0 0x7f 0xf5
+
# CHECK: ldclvc p5, cr15, [r8], #-0
0x00 0xf5 0x78 0x7c
@@ -42,9 +54,17 @@
# CHECK: mvnpls r7, #245, 2
0xf5 0x71 0xf0 0x53
+# CHECK-NOT: orr r7, r8, r7, rrx #0
+# CHECK: orr r7, r8, r7, rrx
+0x67 0x70 0x88 0xe1
+
# CHECK: pkhbt r8, r9, r10, lsl #4
0x1a 0x82 0x89 0xe6
+# CHECK-NOT: pkhbtls pc, r11, r11, lsl #0
+# CHECK: pkhbtls pc, r11, r11
+0x1b 0xf0 0x8b 0x96
+
# CHECK: pop {r0, r2, r4, r6, r8, r10}
0x55 0x05 0xbd 0xe8
@@ -57,6 +77,14 @@
# CHECK: rfedb r0!
0x00 0x0a 0x30 0xf9
+# CHECK-NOT: rsbeq r0, r2, r0, lsl #0
+# CHECK: rsbeq r0, r2, r0
+0x00 0x00 0x62 0x00
+
+# CHECK-NOT: rsceqs r0, r0, r1, lsl #0
+# CHECK: rsceqs r0, r0, r1
+0x01 0x00 0xf0 0x00
+
# CHECK: sbcs r0, pc, #1
0x01 0x00 0xdf 0xe2
@@ -66,6 +94,10 @@
# CHECK: ssat r8, #1, r10, lsl #8
0x1a 0x84 0xa0 0xe6
+# CHECK-NOT: ssatmi r0, #17, r12, lsl #0
+# CHECK: ssatmi r0, #17, r12
+0x1c 0x00 0xb0 0x46
+
# CHECK: stmdb r10!, {r4, r5, r6, r7, lr}
0xf0 0x40 0x2a 0xe9
@@ -75,3 +107,5 @@
# CHECK: ubfx r0, r0, #16, #1
0x50 0x08 0xe0 0xe7
+# CHECK: usat r8, #0, r10, asr #32
+0x5a 0x80 0xe0 0xe6
diff --git a/test/MC/Disassembler/neon-tests.txt b/test/MC/Disassembler/neon-tests.txt
index 51b31e7c1a6e..826ff2272efa 100644
--- a/test/MC/Disassembler/neon-tests.txt
+++ b/test/MC/Disassembler/neon-tests.txt
@@ -25,6 +25,9 @@
# CHECK: vmov.i64 q6, #0xFF00FF00FF
0x75 0xce 0x81 0xf2
+# CHECK: vmvn.i32 d0, #0x0
+0x30 0x00 0x80 0xf2
+
# CHECK: vmul.f32 d0, d0, d6
0x16 0x0d 0x00 0xf3
diff --git a/test/MC/Disassembler/thumb-tests.txt b/test/MC/Disassembler/thumb-tests.txt
index 14e91295276b..06d12fed87fb 100644
--- a/test/MC/Disassembler/thumb-tests.txt
+++ b/test/MC/Disassembler/thumb-tests.txt
@@ -42,6 +42,10 @@
# CHECK: pkhtb r2, r4, r6, asr #16
0xc4 0xea 0x26 0x42
+# CHECK-NOT: pkhbt r2, r4, r6, lsl #0
+# CHECK: pkhbt r2, r4, r6
+0xc4 0xea 0x06 0x02
+
# CHECK: pop {r2, r4, r6, r8, r10, r12}
0xbd 0xe8 0x54 0x15
@@ -51,6 +55,14 @@
# CHECK: rsbs r0, r0, #0
0x40 0x42
+# CHECK-NOT: rsb r0, r2, r0, lsl #0
+# CHECK: rsb r0, r2, r0
+0xc2 0xeb 0x00 0x00
+
+# CHECK-NOT: ssat r0, #17, r12, lsl #0
+# CHECK: ssat r0, #17, r12
+0x0c 0xf3 0x10 0x00
+
# CHECK: strd r0, [r7, #64]
0xc7 0xe9 0x10 0x01
diff --git a/test/MC/ELF/bss.ll b/test/MC/ELF/bss.ll
new file mode 100644
index 000000000000..5112d2c9b0a5
--- /dev/null
+++ b/test/MC/ELF/bss.ll
@@ -0,0 +1,8 @@
+; RUN: llc -filetype=obj %s -o %t
+; FIXME: Add ELF dumping tool to check results.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g0 = global i8* null, align 4 ; <i8**> [#uses=0]
+
diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp
new file mode 100644
index 000000000000..7b7bd4e73807
--- /dev/null
+++ b/test/MC/ELF/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+ RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/test/Makefile b/test/Makefile
index f6830e638393..7ca46beccc3f 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -10,11 +10,11 @@
LEVEL = ..
DIRS =
-#
-# Make Dejagnu the default for testing
-#
all:: check-local
+# 'lit' is the default test runner.
+check-local:: check-local-lit
+
# Include other test rules
include Makefile.tests
@@ -84,18 +84,18 @@ else # !SunOS
ifeq ($(HOST_OS),AuroraUX)
ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
else # !AuroraUX
-# Fedora 13 x86-64 python fails with -v 51200
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 768000 ;
+# Fedora 13 x86-64 python fails with -v 76800
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ;
endif # AuroraUX
endif # SunOS
ifneq ($(RUNTEST),)
-check-local:: site.exp
+check-local-dg:: site.exp
( $(ULIMIT) \
PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \
$(RUNTEST) $(RUNTESTFLAGS) )
else
-check-local:: site.exp
+check-local-dg:: site.exp
@echo "*** dejagnu not found. Make sure 'runtest' is in your PATH, then reconfigure LLVM."
endif
@@ -107,26 +107,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs
( $(ULIMIT) \
$(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
-ifdef TESTONE
-CLEANED_TESTONE := $(patsubst %/,%,$(TESTONE))
-CLEANED_TESTONE := $(patsubst test/%,%,$(CLEANED_TESTONE))
-SUBDIR := $(shell dirname $(CLEANED_TESTONE))
-TESTPATH := $(LLVM_SRC_ROOT)/test/$(CLEANED_TESTONE)
-check-one: site.exp $(TCLSH)
- $(Verb)( echo "source $(LLVM_OBJ_ROOT)/test/site.exp" ; \
- echo "set subdir $(SUBDIR)" ; \
- echo "proc pass { msg } { puts \"PASS: \$$msg\" } "; \
- echo "proc fail { msg } { puts \"FAIL: \$$msg\" }" ; \
- echo "proc xfail { msg } { puts \"XFAIL: \$$msg\" }" ; \
- echo "proc xpass { msg } { puts \"XPASS: \$$msg\" }" ; \
- echo "proc verbose args { }" ; \
- echo "source $(LLVM_SRC_ROOT)/test/lib/llvm.exp" ; \
- echo "RunLLVMTests $(TESTPATH)" ) | \
- ( $(ULIMIT) \
- PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
- $(TCLSH) )
-endif
-
clean::
$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
@@ -166,7 +146,7 @@ site.exp: FORCE
@echo 'set gccpath "$(CC)"' >>site.tmp
@echo 'set gxxpath "$(CXX)"' >>site.tmp
@echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp
- @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >> site.tmp
+ @echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp
@echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp
@echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
@echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
@@ -203,6 +183,3 @@ Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
-e "s#@SHLIBPATH_VAR@#$(SHLIBPATH_VAR)#g" \
$(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
-# Daniel hates Chris.
-chris-lit:
- make check-lit LIT_ARGS='-j16 -s'
diff --git a/test/Other/close-stderr.ll b/test/Other/close-stderr.ll
new file mode 100644
index 000000000000..60fab3df0d9f
--- /dev/null
+++ b/test/Other/close-stderr.ll
@@ -0,0 +1,9 @@
+; RUN: sh -c "\
+; RUN: opt --reject-this-option 2>&-; echo \$?; \
+; RUN: opt -o /dev/null /dev/null 2>&-; echo \$?; \
+; RUN: " | FileCheck %s
+; CHECK: {{^1$}}
+; CHECK: {{^0$}}
+
+; Test that the error handling when writing to stderr fails exits the
+; program cleanly rather than aborting.
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index ecef9c48492e..926bdbc1b464 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -71,8 +71,6 @@
; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
-; PLAIN: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
-; PLAIN: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64)
; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
@@ -82,8 +80,6 @@
; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
-; OPT: @j = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
-; OPT: @k = constant i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64)
; TO: @a = constant i64 18480
; TO: @b = constant i64 8
; TO: @c = constant i64 16
@@ -93,8 +89,6 @@
; TO: @g = constant i64 8
; TO: @h = constant i64 8
; TO: @i = constant i64 8
-; TO: @j = constant i64 8
-; TO: @k = constant i64 8
@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5))
@b = constant i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64)
@@ -105,8 +99,6 @@
@g = constant i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64)
@h = constant i64 ptrtoint (double** getelementptr (double** null, i64 1) to i64)
@i = constant i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64)
-@j = constant i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64)
-@k = constant i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64)
; The target-dependent folder should cast GEP indices to integer-sized pointers.
@@ -275,14 +267,6 @@ define i1* @hoo1() nounwind {
; PLAIN: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
; PLAIN: ret i64 %t
; PLAIN: }
-; PLAIN: define i64 @fj() nounwind {
-; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
-; PLAIN: ret i64 %t
-; PLAIN: }
-; PLAIN: define i64 @fk() nounwind {
-; PLAIN: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64
-; PLAIN: ret i64 %t
-; PLAIN: }
; OPT: define i64 @fa() nounwind {
; OPT: ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
; OPT: }
@@ -310,12 +294,6 @@ define i1* @hoo1() nounwind {
; OPT: define i64 @fi() nounwind {
; OPT: ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
; OPT: }
-; OPT: define i64 @fj() nounwind {
-; OPT: ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
-; OPT: }
-; OPT: define i64 @fk() nounwind {
-; OPT: ret i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64)
-; OPT: }
; TO: define i64 @fa() nounwind {
; TO: ret i64 18480
; TO: }
@@ -343,12 +321,6 @@ define i1* @hoo1() nounwind {
; TO: define i64 @fi() nounwind {
; TO: ret i64 8
; TO: }
-; TO: define i64 @fj() nounwind {
-; TO: ret i64 8
-; TO: }
-; TO: define i64 @fk() nounwind {
-; TO: ret i64 8
-; TO: }
; SCEV: Classifying expressions for: @fa
; SCEV: %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64
; SCEV: --> (2310 * sizeof(double))
@@ -376,12 +348,6 @@ define i1* @hoo1() nounwind {
; SCEV: Classifying expressions for: @fi
; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
; SCEV: --> alignof(i1*)
-; SCEV: Classifying expressions for: @fj
-; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
-; SCEV: --> alignof(double)
-; SCEV: Classifying expressions for: @fk
-; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64) to i64
-; SCEV: --> sizeof(double)
define i64 @fa() nounwind {
%t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
@@ -419,14 +385,6 @@ define i64 @fi() nounwind {
%t = bitcast i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) to i64
ret i64 %t
}
-define i64 @fj() nounwind {
- %t = bitcast i64 ptrtoint (union {double, double}* getelementptr ({i1, union {double, double}}* null, i64 0, i32 1) to i64) to i64
- ret i64 %t
-}
-define i64 @fk() nounwind {
- %t = bitcast i64 ptrtoint (union {double, double}* getelementptr (union {double, double}* null, i64 1) to i64) to i64
- ret i64 %t
-}
; PLAIN: define i64* @fM() nounwind {
; PLAIN: %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
diff --git a/test/Other/inline-asm-newline-terminator.ll b/test/Other/inline-asm-newline-terminator.ll
index f6cc5c1fb421..af93cc0dd2aa 100644
--- a/test/Other/inline-asm-newline-terminator.ll
+++ b/test/Other/inline-asm-newline-terminator.ll
@@ -1,5 +1,4 @@
; RUN: llc -filetype=obj -o - < %s
-; XFAIL: vg_leak
; ModuleID = 't.c'
target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index dee3d11d2fb5..fcef7ee2d571 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -161,5 +161,7 @@ declare i32 @nonstruct_callee() nounwind
define void @struct_caller() nounwind {
entry:
call %struct bitcast (i32 ()* @foo to %struct ()*)()
- ret void
+
+ ; CHECK: Undefined behavior: indirectbr with no destinations
+ indirectbr i8* null, []
}
diff --git a/test/Scripts/coff-dump.py b/test/Scripts/coff-dump.py
new file mode 100755
index 000000000000..0af3d368d5d0
--- /dev/null
+++ b/test/Scripts/coff-dump.py
@@ -0,0 +1,566 @@
+#!/usr/bin/env python
+#===-- coff-dump.py - COFF object file dump utility-------------------------===#
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+#
+# COFF File Definition
+#
+
+def string_table_entry (offset):
+ return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
+
+def secname(value):
+ if value[0] == '/':
+ return string_table_entry (value [1:].rstrip('\0'))
+ else:
+ return '%s'
+
+def symname(value):
+ parts = struct.unpack("<2L", value)
+ if parts [0] == 0:
+ return string_table_entry (parts [1])
+ else:
+ return '%s'
+
+file = ('struct', [
+ ('MachineType', ('enum', '<H', '0x%X', {
+ 0x0: 'IMAGE_FILE_MACHINE_UNKNOWN',
+ 0x1d3: 'IMAGE_FILE_MACHINE_AM33',
+ 0x8664: 'IMAGE_FILE_MACHINE_AMD64',
+ 0x1c0: 'IMAGE_FILE_MACHINE_ARM',
+ 0xebc: 'IMAGE_FILE_MACHINE_EBC',
+ 0x14c: 'IMAGE_FILE_MACHINE_I386',
+ 0x200: 'IMAGE_FILE_MACHINE_IA64',
+ 0x904: 'IMAGE_FILE_MACHINE_M32R',
+ 0x266: 'IMAGE_FILE_MACHINE_MIPS16',
+ 0x366: 'IMAGE_FILE_MACHINE_MIPSFPU',
+ 0x466: 'IMAGE_FILE_MACHINE_MIPSFPU16',
+ 0x1f0: 'IMAGE_FILE_MACHINE_POWERPC',
+ 0x1f1: 'IMAGE_FILE_MACHINE_POWERPCFP',
+ 0x166: 'IMAGE_FILE_MACHINE_R4000',
+ 0x1a2: 'IMAGE_FILE_MACHINE_SH3',
+ 0x1a3: 'IMAGE_FILE_MACHINE_SH3DSP',
+ 0x1a6: 'IMAGE_FILE_MACHINE_SH4',
+ 0x1a8: 'IMAGE_FILE_MACHINE_SH5',
+ 0x1c2: 'IMAGE_FILE_MACHINE_THUMB',
+ 0x169: 'IMAGE_FILE_MACHINE_WCEMIPSV2',
+ })),
+ ('NumberOfSections', ('scalar', '<H', '%d')),
+ ('TimeDateStamp', ('scalar', '<L', '%d')),
+ ('PointerToSymbolTable', ('scalar', '<L', '0x%0X')),
+ ('NumberOfSymbols', ('scalar', '<L', '%d')),
+ ('SizeOfOptionalHeader', ('scalar', '<H', '%d')),
+ ('Characteristics', ('flags', '<H', '0x%x', [
+ (0x0001, 'IMAGE_FILE_RELOCS_STRIPPED', ),
+ (0x0002, 'IMAGE_FILE_EXECUTABLE_IMAGE', ),
+ (0x0004, 'IMAGE_FILE_LINE_NUMS_STRIPPED', ),
+ (0x0008, 'IMAGE_FILE_LOCAL_SYMS_STRIPPED', ),
+ (0x0010, 'IMAGE_FILE_AGGRESSIVE_WS_TRIM', ),
+ (0x0020, 'IMAGE_FILE_LARGE_ADDRESS_AWARE', ),
+ (0x0080, 'IMAGE_FILE_BYTES_REVERSED_LO', ),
+ (0x0100, 'IMAGE_FILE_32BIT_MACHINE', ),
+ (0x0200, 'IMAGE_FILE_DEBUG_STRIPPED', ),
+ (0x0400, 'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
+ (0x0800, 'IMAGE_FILE_NET_RUN_FROM_SWAP', ),
+ (0x1000, 'IMAGE_FILE_SYSTEM', ),
+ (0x2000, 'IMAGE_FILE_DLL', ),
+ (0x4000, 'IMAGE_FILE_UP_SYSTEM_ONLY', ),
+ (0x8000, 'IMAGE_FILE_BYTES_REVERSED_HI', ),
+ ])),
+ ('Sections', ('array', 'NumberOfSections', ('struct', [
+ ('Name', ('scalar', '<8s', secname)),
+ ('VirtualSize', ('scalar', '<L', '%d' )),
+ ('VirtualAddress', ('scalar', '<L', '%d' )),
+ ('SizeOfRawData', ('scalar', '<L', '%d' )),
+ ('PointerToRawData', ('scalar', '<L', '0x%X' )),
+ ('PointerToRelocations', ('scalar', '<L', '0x%X' )),
+ ('PointerToLineNumbers', ('scalar', '<L', '0x%X' )),
+ ('NumberOfRelocations', ('scalar', '<H', '%d' )),
+ ('NumberOfLineNumbers', ('scalar', '<H', '%d' )),
+ ('Charateristics', ('flags', '<L', '0x%X', [
+ (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
+ (0x00000020, 'IMAGE_SCN_CNT_CODE'),
+ (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
+ (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
+ (0x00000100, 'IMAGE_SCN_LNK_OTHER'),
+ (0x00000200, 'IMAGE_SCN_LNK_INFO'),
+ (0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
+ (0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
+ (0x00008000, 'IMAGE_SCN_GPREL'),
+ (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
+ (0x00020000, 'IMAGE_SCN_MEM_16BIT'),
+ (0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
+ (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
+ (0x00F00000, 'IMAGE_SCN_ALIGN', {
+ 0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
+ 0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
+ 0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
+ 0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
+ 0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
+ 0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
+ 0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
+ 0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
+ 0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
+ 0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
+ 0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
+ 0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
+ 0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
+ 0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
+ }),
+ (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
+ (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
+ (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
+ (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
+ (0x10000000, 'IMAGE_SCN_MEM_SHARED'),
+ (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
+ (0x40000000, 'IMAGE_SCN_MEM_READ'),
+ (0x80000000, 'IMAGE_SCN_MEM_WRITE'),
+ ])),
+ ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
+ ('Relocations', ('ptr', 'PointerToRelocations', ('array', 'NumberOfRelocations', ('struct', [
+ ('VirtualAddress', ('scalar', '<L', '0x%X')),
+ ('SymbolTableIndex', ('scalar', '<L', '%d' )),
+ ('Type', ('enum', '<H', '%d', ('MachineType', {
+ 0x14c: {
+ 0x0000: 'IMAGE_REL_I386_ABSOLUTE',
+ 0x0001: 'IMAGE_REL_I386_DIR16',
+ 0x0002: 'IMAGE_REL_I386_REL16',
+ 0x0006: 'IMAGE_REL_I386_DIR32',
+ 0x0007: 'IMAGE_REL_I386_DIR32NB',
+ 0x0009: 'IMAGE_REL_I386_SEG12',
+ 0x000A: 'IMAGE_REL_I386_SECTION',
+ 0x000B: 'IMAGE_REL_I386_SECREL',
+ 0x000C: 'IMAGE_REL_I386_TOKEN',
+ 0x000D: 'IMAGE_REL_I386_SECREL7',
+ 0x0014: 'IMAGE_REL_I386_REL32',
+ },
+ 0x8664: {
+ 0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
+ 0x0001: 'IMAGE_REL_AMD64_ADDR64',
+ 0x0002: 'IMAGE_REL_AMD64_ADDR32',
+ 0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
+ 0x0004: 'IMAGE_REL_AMD64_REL32',
+ 0x0005: 'IMAGE_REL_AMD64_REL32_1',
+ 0x0006: 'IMAGE_REL_AMD64_REL32_2',
+ 0x0007: 'IMAGE_REL_AMD64_REL32_3',
+ 0x0008: 'IMAGE_REL_AMD64_REL32_4',
+ 0x0009: 'IMAGE_REL_AMD64_REL32_5',
+ 0x000A: 'IMAGE_REL_AMD64_SECTION',
+ 0x000B: 'IMAGE_REL_AMD64_SECREL',
+ 0x000C: 'IMAGE_REL_AMD64_SECREL7',
+ 0x000D: 'IMAGE_REL_AMD64_TOKEN',
+ 0x000E: 'IMAGE_REL_AMD64_SREL32',
+ 0x000F: 'IMAGE_REL_AMD64_PAIR',
+ 0x0010: 'IMAGE_REL_AMD64_SSPAN32',
+ },
+ }))),
+ ('SymbolName', ('ptr', '+ PointerToSymbolTable * - SymbolTableIndex 1 18', ('scalar', '<8s', symname)))
+ ])))),
+ ]))),
+ ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '* NumberOfSymbols 18', ('struct', [
+ ('Name', ('scalar', '<8s', symname)),
+ ('Value', ('scalar', '<L', '%d' )),
+ ('SectionNumber', ('scalar', '<H', '%d' )),
+ ('SimpleType', ('enum', '<B', '%d', {
+ 0: 'IMAGE_SYM_TYPE_NULL',
+ 1: 'IMAGE_SYM_TYPE_VOID',
+ 2: 'IMAGE_SYM_TYPE_CHAR',
+ 3: 'IMAGE_SYM_TYPE_SHORT',
+ 4: 'IMAGE_SYM_TYPE_INT',
+ 5: 'IMAGE_SYM_TYPE_LONG',
+ 6: 'IMAGE_SYM_TYPE_FLOAT',
+ 7: 'IMAGE_SYM_TYPE_DOUBLE',
+ 8: 'IMAGE_SYM_TYPE_STRUCT',
+ 9: 'IMAGE_SYM_TYPE_UNION',
+ 10: 'IMAGE_SYM_TYPE_ENUM',
+ 11: 'IMAGE_SYM_TYPE_MOE',
+ 12: 'IMAGE_SYM_TYPE_BYTE',
+ 13: 'IMAGE_SYM_TYPE_WORD',
+ 14: 'IMAGE_SYM_TYPE_UINT',
+ 15: 'IMAGE_SYM_TYPE_DWORD',
+ })),
+ ('ComplexType', ('enum', '<B', '%d', {
+ 0: 'IMAGE_SYM_DTYPE_NULL',
+ 1: 'IMAGE_SYM_DTYPE_POINTER',
+ 2: 'IMAGE_SYM_DTYPE_FUNCTION',
+ 3: 'IMAGE_SYM_DTYPE_ARRAY',
+ })),
+ ('StorageClass', ('enum', '<B', '%d', {
+ -1: 'IMAGE_SYM_CLASS_END_OF_FUNCTION',
+ 0: 'IMAGE_SYM_CLASS_NULL',
+ 1: 'IMAGE_SYM_CLASS_AUTOMATIC',
+ 2: 'IMAGE_SYM_CLASS_EXTERNAL',
+ 3: 'IMAGE_SYM_CLASS_STATIC',
+ 4: 'IMAGE_SYM_CLASS_REGISTER',
+ 5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
+ 6: 'IMAGE_SYM_CLASS_LABEL',
+ 7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
+ 8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
+ 9: 'IMAGE_SYM_CLASS_ARGUMENT',
+ 10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
+ 11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
+ 12: 'IMAGE_SYM_CLASS_UNION_TAG',
+ 13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
+ 14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
+ 15: 'IMAGE_SYM_CLASS_ENUM_TAG',
+ 16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
+ 17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
+ 18: 'IMAGE_SYM_CLASS_BIT_FIELD',
+ 100: 'IMAGE_SYM_CLASS_BLOCK',
+ 101: 'IMAGE_SYM_CLASS_FUNCTION',
+ 102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
+ 103: 'IMAGE_SYM_CLASS_FILE',
+ 104: 'IMAGE_SYM_CLASS_SECTION',
+ 105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
+ 107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
+ })),
+ ('NumberOfAuxSymbols', ('scalar', '<B', '%d' )),
+ ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
+ ])))),
+])
+
+#
+# Definition Interpreter
+#
+
+import sys, types, struct, re
+
+Input = None
+Stack = []
+Fields = {}
+
+Indent = 0
+NewLine = True
+
+def indent():
+ global Indent
+ Indent += 1
+
+def dedent():
+ global Indent
+ Indent -= 1
+
+def write(input):
+ global NewLine
+ output = ""
+
+ for char in input:
+
+ if NewLine:
+ output += Indent * ' '
+ NewLine = False
+
+ output += char
+
+ if char == '\n':
+ NewLine = True
+
+ sys.stdout.write (output)
+
+def read(format):
+ return struct.unpack (format, Input.read(struct.calcsize(format)))
+
+def read_cstr ():
+ output = ""
+ while True:
+ char = Input.read (1)
+ if len (char) == 0:
+ raise RuntimeError ("EOF while reading cstr")
+ if char == '\0':
+ break
+ output += char
+ return output
+
+def push_pos(seek_to = None):
+ Stack [0:0] = [Input.tell ()]
+ if seek_to:
+ Input.seek (seek_to)
+
+def pop_pos():
+ assert(len (Stack) > 0)
+ Input.seek (Stack [0])
+ del Stack [0]
+
+def print_binary_data(size):
+ value = ""
+ while size > 0:
+ if size >= 16:
+ data = Input.read(16)
+ size -= 16
+ else:
+ data = Input.read(size)
+ size = 0
+ value += data
+ bytes = ""
+ text = ""
+ for index in xrange (16):
+ if index < len (data):
+ if index == 8:
+ bytes += "- "
+ ch = ord (data [index])
+ bytes += "%02X " % ch
+ if ch >= 0x20 and ch <= 0x7F:
+ text += data [index]
+ else:
+ text += "."
+ else:
+ if index == 8:
+ bytes += " "
+ bytes += " "
+
+ write ("%s|%s|\n" % (bytes, text))
+ return value
+
+idlit = re.compile ("[a-zA-Z][a-zA-Z0-9_-]*")
+numlit = re.compile ("[0-9]+")
+
+def read_value(expr):
+
+ input = iter (expr.split ())
+
+ def eval():
+
+ token = input.next ()
+
+ if expr == 'cstr':
+ return read_cstr ()
+ if expr == 'true':
+ return True
+ if expr == 'false':
+ return False
+
+ if len (token) > 1 and token [0] in ('=', '@', '<', '!', '>'):
+ val = read(expr)
+ assert (len (val) == 1)
+ return val [0]
+
+ if token == '+':
+ return eval () + eval ()
+ if token == '-':
+ return eval () - eval ()
+ if token == '*':
+ return eval () * eval ()
+ if token == '/':
+ return eval () / eval ()
+
+ if idlit.match (token):
+ return Fields [token]
+ if numlit.match (token):
+ return int (token)
+
+ raise RuntimeError ("unexpected token %s" % repr(token))
+
+ value = eval ()
+
+ try:
+ input.next ()
+ except StopIteration:
+ return value
+ raise RuntimeError("unexpected input at end of expression")
+
+def write_value(format,value):
+ format_type = type (format)
+ if format_type is types.StringType:
+ write (format%value)
+ elif format_type is types.FunctionType:
+ write_value (format (value), value)
+ elif format_type is types.TupleType:
+ Fields ['this'] = value
+ handle_element (format)
+ else:
+ raise RuntimeError("unexpected type: %s" % repr(format_type))
+
+def handle_scalar(entry):
+ iformat = entry [1]
+ oformat = entry [2]
+
+ value = read_value (iformat)
+
+ write_value (oformat, value)
+
+ return value
+
+def handle_enum(entry):
+ iformat = entry [1]
+ oformat = entry [2]
+ definitions = entry [3]
+
+ value = read_value (iformat)
+
+ if type (definitions) is types.TupleType:
+ selector = read_value (definitions [0])
+ definitions = definitions [1] [selector]
+
+ if value in definitions:
+ description = definitions[value]
+ else:
+ description = "unknown"
+
+ write ("%s (" % description)
+ write_value (oformat, value)
+ write (")")
+
+ return value
+
+def handle_flags(entry):
+ iformat = entry [1]
+ oformat = entry [2]
+ definitions = entry [3]
+
+ value = read_value (iformat)
+
+ write_value (oformat, value)
+
+ indent ()
+ for entry in definitions:
+ mask = entry [0]
+ name = entry [1]
+ if len (entry) == 3:
+ map = entry [2]
+ selection = value & mask
+ if selection in map:
+ write("\n%s" % map[selection])
+ else:
+ write("\n%s <%d>" % (name, selection))
+ elif len (entry) == 2:
+ if value & mask != 0:
+ write("\n%s" % name)
+ dedent ()
+
+ return value
+
+def handle_struct(entry):
+ global Fields
+ members = entry [1]
+
+ newFields = {}
+
+ write ("{\n");
+ indent ()
+
+ for member in members:
+ name = member [0]
+ type = member [1]
+
+ write("%s = "%name.ljust(24))
+
+ value = handle_element(type)
+
+ write("\n")
+
+ Fields [name] = value
+ newFields [name] = value
+
+ dedent ()
+ write ("}")
+
+ return newFields
+
+def handle_array(entry):
+ length = entry [1]
+ element = entry [2]
+
+ newItems = []
+
+ write ("[\n")
+ indent ()
+
+ value = read_value (length)
+
+ for index in xrange (value):
+ write ("%d = "%index)
+ value = handle_element(element)
+ write ("\n")
+ newItems.append (value)
+
+ dedent ()
+ write ("]")
+
+ return newItems
+
+def handle_byte_array(entry):
+ length = entry [1]
+ element = entry [2]
+
+ newItems = []
+
+ write ("[\n")
+ indent ()
+
+ value = read_value (length)
+ end_of_array = Input.tell () + value
+
+ index = 0
+ while Input.tell () < end_of_array:
+ write ("%d = "%index)
+ value = handle_element(element)
+ write ("\n")
+ newItems.append (value)
+ index += 1
+
+ dedent ()
+ write ("]")
+
+ return newItems
+
+def handle_ptr(entry):
+ offset = entry[1]
+ element = entry [2]
+
+ value = None
+ offset = read_value (offset)
+
+ if offset != 0:
+
+ push_pos (offset)
+
+ value = handle_element (element)
+
+ pop_pos ()
+
+ else:
+ write ("None")
+
+ return value
+
+def handle_blob(entry):
+ length = entry [1]
+
+ write ("\n")
+ indent ()
+
+ value = print_binary_data (read_value (length))
+
+ dedent ()
+
+ return value
+
+def handle_element(entry):
+ handlers = {
+ 'struct': handle_struct,
+ 'scalar': handle_scalar,
+ 'enum': handle_enum,
+ 'flags': handle_flags,
+ 'ptr': handle_ptr,
+ 'blob': handle_blob,
+ 'array': handle_array,
+ 'byte-array': handle_byte_array,
+ }
+
+ if not entry [0] in handlers:
+ raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
+
+ return handlers [entry [0]] (entry)
+
+Input = open (sys.argv [1], "rb")
+try:
+ handle_element (file)
+finally:
+ Input.close ()
+ Input = None
diff --git a/test/Scripts/coff-dump.py.bat b/test/Scripts/coff-dump.py.bat
new file mode 100644
index 000000000000..cc83eba1c446
--- /dev/null
+++ b/test/Scripts/coff-dump.py.bat
@@ -0,0 +1,4 @@
+@echo off
+
+%PYTHON_EXECUTABLE% %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/test/TableGen/FieldAccess.td b/test/TableGen/FieldAccess.td
new file mode 100644
index 000000000000..ad652e79ea7c
--- /dev/null
+++ b/test/TableGen/FieldAccess.td
@@ -0,0 +1,14 @@
+// RUN: tblgen %s
+class Bla<string t>
+{
+ string blu = t;
+}
+
+class Bli<Bla t>
+{
+ Bla bla = t;
+}
+
+def a : Bli<Bla<"">>;
+def b : Bla<!cast<Bla>(a.bla).blu>; // works
+def c : Bla<a.bla.blu>; // doesn't work: Cannot access field 'blu' of value 'a.bla'
diff --git a/test/TableGen/ListManip.td b/test/TableGen/ListManip.td
new file mode 100644
index 000000000000..c221bb1335b6
--- /dev/null
+++ b/test/TableGen/ListManip.td
@@ -0,0 +1,10 @@
+// RUN: tblgen %s
+class Bli<string _t>
+{
+ string t = _t;
+}
+
+class Bla<list<Bli> _bli>
+: Bli<!car(_bli).t>
+{
+}
diff --git a/test/TestRunner.sh b/test/TestRunner.sh
index 4f04d81aac64..ab50856af11f 100755
--- a/test/TestRunner.sh
+++ b/test/TestRunner.sh
@@ -1,36 +1,5 @@
#!/bin/sh
-#
-# TestRunner.sh - This script is used to run the deja-gnu tests exactly like
-# deja-gnu does, by executing the Tcl script specified in the test case's
-# RUN: lines. This is made possible by a simple make target supported by the
-# test/Makefile. All this script does is invoke that make target.
-#
-# Usage:
-# TestRunner.sh {script_names}
-#
-# This script is typically used by cd'ing to a test directory and then
-# running TestRunner.sh with a list of test file names you want to run.
-#
-TESTPATH=`pwd`
-SUBDIR=""
-if test `dirname $1` = "." ; then
- while test `basename $TESTPATH` != "test" -a ! -z "$TESTPATH" ; do
- tmp=`basename $TESTPATH`
- SUBDIR="$tmp/$SUBDIR"
- TESTPATH=`dirname $TESTPATH`
- done
-fi
+# Deprecated, use 'llvm-lit'.
-for TESTFILE in "$@" ; do
- if test `dirname $TESTFILE` = . ; then
- if test -d "$TESTPATH" ; then
- cd $TESTPATH
- make check-one TESTONE="$SUBDIR$TESTFILE"
- cd $PWD
- else
- echo "Can't find llvm/test directory in " `pwd`
- fi
- else
- make check-one TESTONE=$TESTFILE
- fi
-done
+echo "warning: '$0' is deprecated, use 'llvm-lit' instead."
+exec llvm-lit "$@"
diff --git a/test/Transforms/ABCD/basic.ll b/test/Transforms/ABCD/basic.ll
deleted file mode 100644
index f2ce1b9aa738..000000000000
--- a/test/Transforms/ABCD/basic.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -abcd -S | FileCheck %s
-
-define void @test() {
-; CHECK: @test
-; CHECK-NOT: br i1 %tmp95
-; CHECK: ret void
-entry:
- br label %bb19
-
-bb:
- br label %bb1
-
-bb1:
- %tmp7 = icmp sgt i32 %tmp94, 1
- br i1 %tmp7, label %bb.i.i, label %return
-
-bb.i.i:
- br label %return
-
-bb19:
- %tmp94 = ashr i32 undef, 3
- %tmp95 = icmp sgt i32 %tmp94, 16
- br i1 %tmp95, label %bb, label %return
-
-return:
- ret void
-}
diff --git a/test/Transforms/ConstProp/constant-expr.ll b/test/Transforms/ConstProp/constant-expr.ll
index 996303293d18..556ed1f652af 100644
--- a/test/Transforms/ConstProp/constant-expr.ll
+++ b/test/Transforms/ConstProp/constant-expr.ll
@@ -16,9 +16,9 @@
@E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y)
@F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
-; CHECK: @F = global i1 false ; <i1*> [#uses=0]
+; CHECK: @F = global i1 false
@G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
-; CHECK: @G = global i1 false ; <i1*> [#uses=0]
+; CHECK: @G = global i1 false
@H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*))
; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y)
diff --git a/test/Transforms/ConstantMerge/dont-merge.ll b/test/Transforms/ConstantMerge/dont-merge.ll
index 877cf8dc6710..e5337dff27df 100644
--- a/test/Transforms/ConstantMerge/dont-merge.ll
+++ b/test/Transforms/ConstantMerge/dont-merge.ll
@@ -28,3 +28,17 @@ define void @test2(i32** %P1, i32 addrspace(30)** %P2) {
store i32 addrspace(30)* @T2b, i32 addrspace(30)** %P2
ret void
}
+
+; PR8144 - Don't merge globals marked attribute(used)
+; CHECK: @T3A =
+; CHECK: @T3B =
+
+@T3A = internal constant i32 0
+@T3B = internal constant i32 0
+@llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section
+"llvm.metadata"
+
+define void @test3() {
+ call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B
+ ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
new file mode 100644
index 000000000000..fef5b8579eb5
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -correlated-propagation | FileCheck %s
+
+; CHECK: @test
+define i16 @test(i32 %a, i1 %b) {
+entry:
+ %c = icmp eq i32 %a, 0
+ br i1 %c, label %left, label %right
+
+right:
+ %d = trunc i32 %a to i1
+ br label %merge
+
+left:
+ br i1 %b, label %merge, label %other
+
+other:
+ ret i16 23
+
+merge:
+ %f = phi i1 [%b, %left], [%d, %right]
+; CHECK: select i1 %f, i16 1, i16 0
+ %h = select i1 %f, i16 1, i16 0
+; CHECK: ret i16 %h
+ ret i16 %h
+} \ No newline at end of file
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
new file mode 100644
index 000000000000..24666e901e9e
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+; PR2581
+
+; CHECK: @test1
+define i32 @test1(i1 %C) nounwind {
+ br i1 %C, label %exit, label %body
+
+body: ; preds = %0
+; CHECK-NOT: select
+ %A = select i1 %C, i32 10, i32 11 ; <i32> [#uses=1]
+; CHECK: ret i32 11
+ ret i32 %A
+
+exit: ; preds = %0
+; CHECK: ret i32 10
+ ret i32 10
+}
+
+; PR4420
+declare i1 @ext()
+; CHECK: @test2
+define i1 @test2() {
+entry:
+ %cond = tail call i1 @ext() ; <i1> [#uses=2]
+ br i1 %cond, label %bb1, label %bb2
+
+bb1: ; preds = %entry
+ %cond2 = tail call i1 @ext() ; <i1> [#uses=1]
+ br i1 %cond2, label %bb3, label %bb2
+
+bb2: ; preds = %bb1, %entry
+; CHECK-NOT: phi i1
+ %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ] ; <i1> [#uses=1]
+; CHECK: ret i1 false
+ ret i1 %cond_merge
+
+bb3: ; preds = %bb1
+ %res = tail call i1 @ext() ; <i1> [#uses=1]
+; CHECK: ret i1 %res
+ ret i1 %res
+}
+
+; PR4855
+@gv = internal constant i8 7
+; CHECK: @test3
+define i8 @test3(i8* %a) nounwind {
+entry:
+ %cond = icmp eq i8* %a, @gv
+ br i1 %cond, label %bb2, label %bb
+
+bb: ; preds = %entry
+ ret i8 0
+
+bb2: ; preds = %entry
+; CHECK-NOT: load i8* %a
+ %should_be_const = load i8* %a
+; CHECK: ret i8 7
+ ret i8 %should_be_const
+}
+
+; PR1757
+; CHECK: @test4
+define i32 @test4(i32) {
+EntryBlock:
+; CHECK: icmp sgt i32 %0, 2
+ %.demorgan = icmp sgt i32 %0, 2
+ br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo
+
+GreaterThanTwo:
+; CHECK-NOT: icmp eq i32 %0, 2
+ icmp eq i32 %0, 2
+; CHECK: br i1 false
+ br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo
+
+NotTwoAndGreaterThanTwo:
+ ret i32 2
+
+Impossible:
+ ret i32 1
+
+LessThanOrEqualToTwo:
+ ret i32 0
+} \ No newline at end of file
diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp
new file mode 100644
index 000000000000..de42dad163fd
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
index 641e920006b2..f079108b9bda 100644
--- a/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
+++ b/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -gvn | llvm-dis
; PR4256
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-linux-gnu"
+target triple = "i386-pc-linux-gnu"
%llvm.dbg.anchor.type = type { i32, i32 }
%struct.cset = type { i8*, i8, i8, i32, i8* }
%struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* }
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 5e64f807f6ea..390e77a8cea8 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -4,7 +4,7 @@
; RUN: opt < %s -globalopt -S > %t
; Check that the new global values still have their address space
-; RUN: cat %t | grep global.*addrspace
+; RUN: cat %t | grep addrspace.*global
@struct = internal addrspace(1) global { i32, i32 } zeroinitializer
@array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer
diff --git a/test/Transforms/GlobalOpt/crash.ll b/test/Transforms/GlobalOpt/crash.ll
index 701472c059a8..bb1fc84f46f9 100644
--- a/test/Transforms/GlobalOpt/crash.ll
+++ b/test/Transforms/GlobalOpt/crash.ll
@@ -40,3 +40,18 @@ xx:
}
declare noalias i8* @malloc(i64) nounwind
+
+
+; PR8063
+@permute_bitrev.bitrev = internal global i32* null, align 8
+define void @permute_bitrev() nounwind {
+entry:
+ %tmp = load i32** @permute_bitrev.bitrev, align 8
+ %conv = sext i32 0 to i64
+ %mul = mul i64 %conv, 4
+ %call = call i8* @malloc(i64 %mul)
+ %0 = bitcast i8* %call to i32*
+ store i32* %0, i32** @permute_bitrev.bitrev, align 8
+ ret void
+}
+
diff --git a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
index f4bab353cd07..bd174a8be3ff 100644
--- a/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -21,10 +21,10 @@ define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
entry:
%0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
%1 = load i32* %0
-; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1) ; <i32> [#uses=1]
+; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1)
%2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
%3 = load i8* %2
-; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0) ; <i8> [#uses=1]
+; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0)
%4 = zext i8 %3 to i32
%5 = add i32 %4, %1
ret i32 %5
diff --git a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll b/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll
deleted file mode 100644
index c8f97e39bef6..000000000000
--- a/test/Transforms/IndVarSimplify/2003-12-10-IndVarDeadCode.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; The induction variable canonicalization pass shouldn't leave dead
-; instructions laying around!
-;
-; RUN: opt < %s -indvars -S | \
-; RUN: not grep {#uses=0}
-
-define i32 @mul(i32 %x, i32 %y) {
-entry:
- br label %tailrecurse
-
-tailrecurse: ; preds = %endif, %entry
- %accumulator.tr = phi i32 [ %x, %entry ], [ %tmp.9, %endif ] ; <i32> [#uses=2]
- %y.tr = phi i32 [ %y, %entry ], [ %tmp.8, %endif ] ; <i32> [#uses=2]
- %tmp.1 = icmp eq i32 %y.tr, 0 ; <i1> [#uses=1]
- br i1 %tmp.1, label %return, label %endif
-
-endif: ; preds = %tailrecurse
- %tmp.8 = add i32 %y.tr, -1 ; <i32> [#uses=1]
- %tmp.9 = add i32 %accumulator.tr, %x ; <i32> [#uses=1]
- br label %tailrecurse
-
-return: ; preds = %tailrecurse
- ret i32 %accumulator.tr
-}
-
diff --git a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
index d73eee812b30..d211e3b824b2 100644
--- a/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
+++ b/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -indvars
; PR4258
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-linux-gnu"
+target triple = "i386-pc-linux-gnu"
define void @0(i32*, i32*, i32, i32) nounwind {
br i1 false, label %bb.nph1.preheader, label %.outer._crit_edge
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index ab438334c660..516fd8084d9e 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -1,4 +1,5 @@
; RUN: opt -indvars %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
declare i32 @putchar(i8) nounwind
@@ -17,3 +18,38 @@ define void @t2(i1* %P) nounwind {
; <label>:6 ; preds = %1
ret void
}
+
+; PR7562
+define void @fannkuch() nounwind {
+entry: ; preds = %entry
+ br label %bb12
+
+bb12: ; preds = %bb29, %entry
+ %i.1 = phi i32 [ undef, %entry ], [ %i.0, %bb29 ] ; <i32> [#uses=2]
+ %r.1 = phi i32 [ undef, %entry ], [ %r.0, %bb29 ] ; <i32> [#uses=2]
+ br i1 undef, label %bb13, label %bb24
+
+bb13: ; preds = %bb12
+ br label %bb24
+
+bb24: ; preds = %bb30, %bb13, %bb12
+ %i.2 = phi i32 [ %i.1, %bb13 ], [ %i.0, %bb30 ], [ %i.1, %bb12 ] ; <i32> [#uses=1]
+ %r.0 = phi i32 [ %r.1, %bb13 ], [ %2, %bb30 ], [ %r.1, %bb12 ] ; <i32> [#uses=3]
+ br label %bb28
+
+bb27: ; preds = %bb28
+ %0 = add nsw i32 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb28
+
+bb28: ; preds = %bb27, %bb26
+ %i.0 = phi i32 [ %i.2, %bb24 ], [ %0, %bb27 ] ; <i32> [#uses=4]
+ %1 = icmp slt i32 %i.0, %r.0 ; <i1> [#uses=1]
+ br i1 %1, label %bb27, label %bb29
+
+bb29: ; preds = %bb28
+ br i1 undef, label %bb12, label %bb30
+
+bb30: ; preds = %bb29
+ %2 = add nsw i32 %r.0, 1 ; <i32> [#uses=1]
+ br label %bb24
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index 4ec4acadb4a5..269478a5ed03 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -5,7 +5,7 @@
; exit is taken. Indvars should correctly compute the exit values.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-linux-gnu"
+target triple = "x86_64-pc-linux-gnu"
%struct..0anon = type <{ i8, [3 x i8] }>
define i32 @main() nounwind {
diff --git a/test/Transforms/IndVarSimplify/uglygep.ll b/test/Transforms/IndVarSimplify/uglygep.ll
new file mode 100644
index 000000000000..0014b683db4b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/uglygep.ll
@@ -0,0 +1,40 @@
+; RUN: opt -indvars -S < %s | not grep uglygep
+; rdar://8197217
+
+; Indvars should be able to emit a clean GEP here, not an uglygep.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+@numf2s = external global i32 ; <i32*> [#uses=1]
+@numf1s = external global i32 ; <i32*> [#uses=1]
+@tds = external global double** ; <double***> [#uses=1]
+
+define void @init_td(i32 %tmp7) nounwind {
+entry:
+ br label %bb4
+
+bb4: ; preds = %bb3, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %tmp9, %bb3 ] ; <i32> [#uses=3]
+ br label %bb
+
+bb: ; preds = %bb4
+ br label %bb2
+
+bb2: ; preds = %bb1, %bb
+ %j.0 = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ] ; <i32> [#uses=3]
+ %tmp8 = icmp slt i32 %j.0, %tmp7 ; <i1> [#uses=1]
+ br i1 %tmp8, label %bb1, label %bb3
+
+bb1: ; preds = %bb2
+ %tmp = load double*** @tds, align 8 ; <double**> [#uses=1]
+ %tmp1 = sext i32 %i.0 to i64 ; <i64> [#uses=1]
+ %tmp2 = getelementptr inbounds double** %tmp, i64 %tmp1 ; <double**> [#uses=1]
+ %tmp3 = load double** %tmp2, align 1 ; <double*> [#uses=1]
+ %tmp6 = add nsw i32 %j.0, 1 ; <i32> [#uses=1]
+ br label %bb2
+
+bb3: ; preds = %bb2
+ %tmp9 = add nsw i32 %i.0, 1 ; <i32> [#uses=1]
+ br label %bb4
+}
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index d8ad5a9864e2..27916b986030 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -1,10 +1,13 @@
-; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
; Instcombine should be able to prove vector alignment in the
; presence of a few mild address computation tricks.
-define void @foo(i8* %b, i64 %n, i64 %u, i64 %y) nounwind {
+; CHECK: @test0(
+; CHECK: align 16
+
+define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind {
entry:
%c = ptrtoint i8* %b to i64
%d = and i64 %c, -16
@@ -29,3 +32,29 @@ return:
ret void
}
+; When we see a unaligned load from an insufficiently aligned global or
+; alloca, increase the alignment of the load, turning it into an aligned load.
+
+; CHECK: @test1(
+; CHECK: tmp = load
+; CHECK: GLOBAL{{.*}}align 16
+
+@GLOBAL = internal global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1(<2 x i64> %x) {
+entry:
+ %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
+ ret <16 x i8> %tmp
+}
+
+; When a load or store lacks an explicit alignment, add one.
+
+; CHECK: @test2(
+; CHECK: load double* %p, align 8
+; CHECK: store double %n, double* %p, align 8
+
+define double @test2(double* %p, double %n) nounwind {
+ %t = load double* %p
+ store double %n, double* %p
+ ret double %t
+}
diff --git a/test/Transforms/InstCombine/align-inc.ll b/test/Transforms/InstCombine/align-inc.ll
deleted file mode 100644
index 71512b3a1494..000000000000
--- a/test/Transforms/InstCombine/align-inc.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep {GLOBAL.*align 16}
-; RUN: opt < %s -instcombine -S | grep {tmp = load}
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
-
-@GLOBAL = internal global [4 x i32] zeroinitializer
-
-define <16 x i8> @foo(<2 x i64> %x) {
-entry:
- %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
- ret <16 x i8> %tmp
-}
-
diff --git a/test/Transforms/InstCombine/bit-checks.ll b/test/Transforms/InstCombine/bit-checks.ll
index f97fb45af161..d774c0972def 100644
--- a/test/Transforms/InstCombine/bit-checks.ll
+++ b/test/Transforms/InstCombine/bit-checks.ll
@@ -13,3 +13,14 @@ entry:
%retval.0 = select i1 %or.cond, i32 2, i32 1 ; <i32> [#uses=1]
ret i32 %retval.0
}
+
+define i32 @main2(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+ %and = and i32 %argc, 1 ; <i32> [#uses=1]
+ %tobool = icmp eq i32 %and, 0 ; <i1> [#uses=1]
+ %and2 = and i32 %argc, 2 ; <i32> [#uses=1]
+ %tobool3 = icmp eq i32 %and2, 0 ; <i1> [#uses=1]
+ %or.cond = or i1 %tobool, %tobool3 ; <i1> [#uses=1]
+ %storemerge = select i1 %or.cond, i32 0, i32 1 ; <i32> [#uses=1]
+ ret i32 %storemerge
+} \ No newline at end of file
diff --git a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll b/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll
deleted file mode 100644
index 4e9dfbb53b49..000000000000
--- a/test/Transforms/InstCombine/bitcast-scalar-to-vector.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep {ret i32 0}
-; PR4487
-
-; Bitcasts between vectors and scalars are valid, despite being ill-advised.
-
-define i32 @test(i64 %a) {
-bb20:
- %t1 = bitcast i64 %a to <2 x i32>
- %t2 = bitcast i64 %a to <2 x i32>
- %t3 = xor <2 x i32> %t1, %t2
- %t4 = extractelement <2 x i32> %t3, i32 0
- ret i32 %t4
-}
-
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
new file mode 100644
index 000000000000..0718b8a3aee0
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Bitcasts between vectors and scalars are valid.
+; PR4487
+define i32 @test1(i64 %a) {
+ %t1 = bitcast i64 %a to <2 x i32>
+ %t2 = bitcast i64 %a to <2 x i32>
+ %t3 = xor <2 x i32> %t1, %t2
+ %t4 = extractelement <2 x i32> %t3, i32 0
+ ret i32 %t4
+
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+; Optimize bitcasts that are extracting low element of vector. This happens
+; because of SRoA.
+; rdar://7892780
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+ %tmp28 = bitcast <2 x float> %A to i64 ; <i64> [#uses=2]
+ %tmp23 = trunc i64 %tmp28 to i32 ; <i32> [#uses=1]
+ %tmp24 = bitcast i32 %tmp23 to float ; <float> [#uses=1]
+
+ %tmp = bitcast <2 x i32> %B to i64
+ %tmp2 = trunc i64 %tmp to i32 ; <i32> [#uses=1]
+ %tmp4 = bitcast i32 %tmp2 to float ; <float> [#uses=1]
+
+ %add = fadd float %tmp24, %tmp4
+ ret float %add
+
+; CHECK: @test2
+; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0
+; CHECK-NEXT: %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT: ret float %add
+}
+
+; Optimize bitcasts that are extracting other elements of a vector. This
+; happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+ %tmp28 = bitcast <2 x float> %A to i64
+ %tmp29 = lshr i64 %tmp28, 32
+ %tmp23 = trunc i64 %tmp29 to i32
+ %tmp24 = bitcast i32 %tmp23 to float
+
+ %tmp = bitcast <2 x i64> %B to i128
+ %tmp1 = lshr i128 %tmp, 64
+ %tmp2 = trunc i128 %tmp1 to i32
+ %tmp4 = bitcast i32 %tmp2 to float
+
+ %add = fadd float %tmp24, %tmp4
+ ret float %add
+
+; CHECK: @test3
+; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2
+; CHECK-NEXT: %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT: ret float %add
+}
+
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+ %tmp38 = zext i32 %A to i64
+ %tmp32 = zext i32 %B to i64
+ %tmp33 = shl i64 %tmp32, 32
+ %ins35 = or i64 %tmp33, %tmp38
+ %tmp43 = bitcast i64 %ins35 to <2 x i32>
+ ret <2 x i32> %tmp43
+ ; CHECK: @test4
+ ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
+ ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
+ ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+; rdar://8360454
+define <2 x float> @test5(float %A, float %B) {
+ %tmp37 = bitcast float %A to i32
+ %tmp38 = zext i32 %tmp37 to i64
+ %tmp31 = bitcast float %B to i32
+ %tmp32 = zext i32 %tmp31 to i64
+ %tmp33 = shl i64 %tmp32, 32
+ %ins35 = or i64 %tmp33, %tmp38
+ %tmp43 = bitcast i64 %ins35 to <2 x float>
+ ret <2 x float> %tmp43
+ ; CHECK: @test5
+ ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+ ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
+ ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+ %tmp23 = bitcast float %A to i32 ; <i32> [#uses=1]
+ %tmp24 = zext i32 %tmp23 to i64 ; <i64> [#uses=1]
+ %tmp25 = shl i64 %tmp24, 32 ; <i64> [#uses=1]
+ %mask20 = or i64 %tmp25, 1109917696 ; <i64> [#uses=1]
+ %tmp35 = bitcast i64 %mask20 to <2 x float> ; <<2 x float>> [#uses=1]
+ ret <2 x float> %tmp35
+; CHECK: @test6
+; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
+; CHECK: ret
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 08dcfa731a94..d672d8c1535e 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -10,16 +10,16 @@ declare i32 @llvm.ctlz.i32(i32) nounwind readnone
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
declare i8 @llvm.ctlz.i8(i8) nounwind readnone
-define i8 @test1(i8 %A, i8 %B) {
+define i8 @uaddtest1(i8 %A, i8 %B) {
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
%y = extractvalue %overflow.result %x, 0
ret i8 %y
-; CHECK: @test1
+; CHECK: @uaddtest1
; CHECK-NEXT: %y = add i8 %A, %B
; CHECK-NEXT: ret i8 %y
}
-define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) {
+define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) {
%and.A = and i8 %A, 127
%and.B = and i8 %B, 127
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B)
@@ -27,7 +27,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) {
%z = extractvalue %overflow.result %x, 1
store i1 %z, i1* %overflowPtr
ret i8 %y
-; CHECK: @test2
+; CHECK: @uaddtest2
; CHECK-NEXT: %and.A = and i8 %A, 127
; CHECK-NEXT: %and.B = and i8 %B, 127
; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B
@@ -35,7 +35,7 @@ define i8 @test2(i8 %A, i8 %B, i1* %overflowPtr) {
; CHECK-NEXT: ret i8 %1
}
-define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) {
+define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
%or.A = or i8 %A, -128
%or.B = or i8 %B, -128
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B)
@@ -43,7 +43,7 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) {
%z = extractvalue %overflow.result %x, 1
store i1 %z, i1* %overflowPtr
ret i8 %y
-; CHECK: @test3
+; CHECK: @uaddtest3
; CHECK-NEXT: %or.A = or i8 %A, -128
; CHECK-NEXT: %or.B = or i8 %B, -128
; CHECK-NEXT: %1 = add i8 %or.A, %or.B
@@ -51,34 +51,44 @@ define i8 @test3(i8 %A, i8 %B, i1* %overflowPtr) {
; CHECK-NEXT: ret i8 %1
}
-define i8 @test4(i8 %A, i1* %overflowPtr) {
+define i8 @uaddtest4(i8 %A, i1* %overflowPtr) {
%x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 undef, i8 %A)
%y = extractvalue %overflow.result %x, 0
%z = extractvalue %overflow.result %x, 1
store i1 %z, i1* %overflowPtr
ret i8 %y
-; CHECK: @test4
+; CHECK: @uaddtest4
; CHECK-NEXT: ret i8 undef
}
-define i8 @test5(i8 %A, i1* %overflowPtr) {
+define i8 @uaddtest5(i8 %A, i1* %overflowPtr) {
+ %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 0, i8 %A)
+ %y = extractvalue %overflow.result %x, 0
+ %z = extractvalue %overflow.result %x, 1
+ store i1 %z, i1* %overflowPtr
+ ret i8 %y
+; CHECK: @uaddtest5
+; CHECK: ret i8 %A
+}
+
+define i8 @umultest1(i8 %A, i1* %overflowPtr) {
%x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
%y = extractvalue %overflow.result %x, 0
%z = extractvalue %overflow.result %x, 1
store i1 %z, i1* %overflowPtr
ret i8 %y
-; CHECK: @test5
+; CHECK: @umultest1
; CHECK-NEXT: store i1 false, i1* %overflowPtr
; CHECK-NEXT: ret i8 0
}
-define i8 @test6(i8 %A, i1* %overflowPtr) {
+define i8 @umultest2(i8 %A, i1* %overflowPtr) {
%x = call %overflow.result @llvm.umul.with.overflow.i8(i8 1, i8 %A)
%y = extractvalue %overflow.result %x, 0
%z = extractvalue %overflow.result %x, 1
store i1 %z, i1* %overflowPtr
ret i8 %y
-; CHECK: @test6
+; CHECK: @umultest2
; CHECK-NEXT: store i1 false, i1* %overflowPtr
; CHECK-NEXT: ret i8 %A
}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index fc321e968224..c6c3f2ff6a68 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -402,3 +402,24 @@ if.else: ; preds = %entry
store i32 %tmp5, i32* %res
br label %if.end
}
+
+; PR4413
+declare i32 @ext()
+; CHECK: @test17
+define i32 @test17(i1 %a) {
+entry:
+ br i1 %a, label %bb1, label %bb2
+
+bb1: ; preds = %entry
+ %0 = tail call i32 @ext() ; <i32> [#uses=1]
+ br label %bb2
+
+bb2: ; preds = %bb1, %entry
+ %cond = phi i1 [ true, %bb1 ], [ false, %entry ] ; <i1> [#uses=1]
+; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+ %val = phi i32 [ %0, %bb1 ], [ 0, %entry ] ; <i32> [#uses=1]
+ %res = select i1 %cond, i32 %val, i32 0 ; <i32> [#uses=1]
+; CHECK: ret i32 %cond
+ ret i32 %res
+}
+
diff --git a/test/Transforms/InstCombine/shift-simplify.ll b/test/Transforms/InstCombine/shift-simplify.ll
deleted file mode 100644
index e5cc705350f9..000000000000
--- a/test/Transforms/InstCombine/shift-simplify.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN: egrep {shl|lshr|ashr} | count 3
-
-define i32 @test0(i32 %A, i32 %B, i32 %C) {
- %X = shl i32 %A, %C
- %Y = shl i32 %B, %C
- %Z = and i32 %X, %Y
- ret i32 %Z
-}
-
-define i32 @test1(i32 %A, i32 %B, i32 %C) {
- %X = lshr i32 %A, %C
- %Y = lshr i32 %B, %C
- %Z = or i32 %X, %Y
- ret i32 %Z
-}
-
-define i32 @test2(i32 %A, i32 %B, i32 %C) {
- %X = ashr i32 %A, %C
- %Y = ashr i32 %B, %C
- %Z = xor i32 %X, %Y
- ret i32 %Z
-}
-
-define i1 @test3(i32 %X) {
- %tmp1 = shl i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
-}
-
-define i1 @test4(i32 %X) {
- %tmp1 = lshr i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
-}
-
-define i1 @test5(i32 %X) {
- %tmp1 = ashr i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
-}
-
diff --git a/test/Transforms/InstCombine/shift-trunc-shift.ll b/test/Transforms/InstCombine/shift-trunc-shift.ll
deleted file mode 100644
index 7133d299a2bd..000000000000
--- a/test/Transforms/InstCombine/shift-trunc-shift.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep lshr.*63
-
-define i32 @t1(i64 %d18) {
-entry:
- %tmp916 = lshr i64 %d18, 32 ; <i64> [#uses=1]
- %tmp917 = trunc i64 %tmp916 to i32 ; <i32> [#uses=1]
- %tmp10 = lshr i32 %tmp917, 31 ; <i32> [#uses=1]
- ret i32 %tmp10
-}
-
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index feed37bd10ab..871e9fe070e7 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -130,8 +130,8 @@ define i8 @test13(i8 %A) {
;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
define i32 @test14(i32 %A) {
; CHECK: @test14
-; CHECK-NEXT: or i32 %A, 19744
-; CHECK-NEXT: and i32
+; CHECK-NEXT: %B = and i32 %A, -19760
+; CHECK-NEXT: or i32 %B, 19744
; CHECK-NEXT: ret i32
%B = lshr i32 %A, 4 ; <i32> [#uses=1]
%C = or i32 %B, 1234 ; <i32> [#uses=1]
@@ -343,3 +343,101 @@ bb2:
}
+define i32 @test29(i64 %d18) {
+entry:
+ %tmp916 = lshr i64 %d18, 32
+ %tmp917 = trunc i64 %tmp916 to i32
+ %tmp10 = lshr i32 %tmp917, 31
+ ret i32 %tmp10
+; CHECK: @test29
+; CHECK: %tmp916 = lshr i64 %d18, 63
+; CHECK: %tmp10 = trunc i64 %tmp916 to i32
+}
+
+
+define i32 @test30(i32 %A, i32 %B, i32 %C) {
+ %X = shl i32 %A, %C
+ %Y = shl i32 %B, %C
+ %Z = and i32 %X, %Y
+ ret i32 %Z
+; CHECK: @test30
+; CHECK: %X1 = and i32 %A, %B
+; CHECK: %Z = shl i32 %X1, %C
+}
+
+define i32 @test31(i32 %A, i32 %B, i32 %C) {
+ %X = lshr i32 %A, %C
+ %Y = lshr i32 %B, %C
+ %Z = or i32 %X, %Y
+ ret i32 %Z
+; CHECK: @test31
+; CHECK: %X1 = or i32 %A, %B
+; CHECK: %Z = lshr i32 %X1, %C
+}
+
+define i32 @test32(i32 %A, i32 %B, i32 %C) {
+ %X = ashr i32 %A, %C
+ %Y = ashr i32 %B, %C
+ %Z = xor i32 %X, %Y
+ ret i32 %Z
+; CHECK: @test32
+; CHECK: %X1 = xor i32 %A, %B
+; CHECK: %Z = ashr i32 %X1, %C
+; CHECK: ret i32 %Z
+}
+
+define i1 @test33(i32 %X) {
+ %tmp1 = shl i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
+; CHECK: @test33
+; CHECK: %tmp1.mask = and i32 %X, 16777216
+; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0
+}
+
+define i1 @test34(i32 %X) {
+ %tmp1 = lshr i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
+; CHECK: @test34
+; CHECK: ret i1 false
+}
+
+define i1 @test35(i32 %X) {
+ %tmp1 = ashr i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
+; CHECK: @test35
+; CHECK: %tmp2 = icmp slt i32 %X, 0
+; CHECK: ret i1 %tmp2
+}
+
+define i128 @test36(i128 %A, i128 %B) {
+entry:
+ %tmp27 = shl i128 %A, 64
+ %tmp23 = shl i128 %B, 64
+ %ins = or i128 %tmp23, %tmp27
+ %tmp45 = lshr i128 %ins, 64
+ ret i128 %tmp45
+
+; CHECK: @test36
+; CHECK: %tmp231 = or i128 %B, %A
+; CHECK: %ins = and i128 %tmp231, 18446744073709551615
+; CHECK: ret i128 %ins
+}
+
+define i64 @test37(i128 %A, i32 %B) {
+entry:
+ %tmp27 = shl i128 %A, 64
+ %tmp22 = zext i32 %B to i128
+ %tmp23 = shl i128 %tmp22, 96
+ %ins = or i128 %tmp23, %tmp27
+ %tmp45 = lshr i128 %ins, 64
+ %tmp46 = trunc i128 %tmp45 to i64
+ ret i64 %tmp46
+
+; CHECK: @test37
+; CHECK: %tmp23 = shl i128 %tmp22, 32
+; CHECK: %ins = or i128 %tmp23, %A
+; CHECK: %tmp46 = trunc i128 %ins to i64
+}
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
new file mode 100644
index 000000000000..69e511bfb3bd
--- /dev/null
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define float @test1(float %x) nounwind readnone ssp {
+entry:
+; CHECK: @test1
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+ %conv = fpext float %x to double ; <double> [#uses=1]
+ %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1]
+ %conv1 = fptrunc double %call to float ; <float> [#uses=1]
+; CHECK: ret float
+ ret float %conv1
+}
+
+declare double @sqrt(double)
+
+; PR8096
+define float @test2(float %x) nounwind readnone ssp {
+entry:
+; CHECK: @test2
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+ %conv = fpext float %x to double ; <double> [#uses=1]
+ %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1]
+ %conv1 = fptrunc double %call to float ; <float> [#uses=1]
+; CHECK: ret float
+ ret float %conv1
+}
diff --git a/test/Transforms/InstCombine/trunc-mask-ext.ll b/test/Transforms/InstCombine/trunc-mask-ext.ll
deleted file mode 100644
index 93e3753cf502..000000000000
--- a/test/Transforms/InstCombine/trunc-mask-ext.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt < %s -instcombine -S > %t
-; RUN: not grep zext %t
-; RUN: not grep sext %t
-
-; Instcombine should be able to eliminate all of these ext casts.
-
-declare void @use(i32)
-
-define i64 @foo(i64 %a) {
- %b = trunc i64 %a to i32
- %c = and i32 %b, 15
- %d = zext i32 %c to i64
- call void @use(i32 %b)
- ret i64 %d
-}
-define i64 @bar(i64 %a) {
- %b = trunc i64 %a to i32
- %c = shl i32 %b, 4
- %q = ashr i32 %c, 4
- %d = sext i32 %q to i64
- call void @use(i32 %b)
- ret i64 %d
-}
-define i64 @goo(i64 %a) {
- %b = trunc i64 %a to i32
- %c = and i32 %b, 8
- %d = zext i32 %c to i64
- call void @use(i32 %b)
- ret i64 %d
-}
-define i64 @hoo(i64 %a) {
- %b = trunc i64 %a to i32
- %c = and i32 %b, 8
- %x = xor i32 %c, 8
- %d = zext i32 %x to i64
- call void @use(i32 %b)
- ret i64 %d
-}
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
new file mode 100644
index 000000000000..f98bfd9236cd
--- /dev/null
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Instcombine should be able to eliminate all of these ext casts.
+
+declare void @use(i32)
+
+define i64 @test1(i64 %a) {
+ %b = trunc i64 %a to i32
+ %c = and i32 %b, 15
+ %d = zext i32 %c to i64
+ call void @use(i32 %b)
+ ret i64 %d
+; CHECK: @test1
+; CHECK: %d = and i64 %a, 15
+; CHECK: ret i64 %d
+}
+define i64 @test2(i64 %a) {
+ %b = trunc i64 %a to i32
+ %c = shl i32 %b, 4
+ %q = ashr i32 %c, 4
+ %d = sext i32 %q to i64
+ call void @use(i32 %b)
+ ret i64 %d
+; CHECK: @test2
+; CHECK: shl i64 %a, 36
+; CHECK: %d = ashr i64 {{.*}}, 36
+; CHECK: ret i64 %d
+}
+define i64 @test3(i64 %a) {
+ %b = trunc i64 %a to i32
+ %c = and i32 %b, 8
+ %d = zext i32 %c to i64
+ call void @use(i32 %b)
+ ret i64 %d
+; CHECK: @test3
+; CHECK: %d = and i64 %a, 8
+; CHECK: ret i64 %d
+}
+define i64 @test4(i64 %a) {
+ %b = trunc i64 %a to i32
+ %c = and i32 %b, 8
+ %x = xor i32 %c, 8
+ %d = zext i32 %x to i64
+ call void @use(i32 %b)
+ ret i64 %d
+; CHECK: @test4
+; CHECK: = and i64 %a, 8
+; CHECK: %d = xor i64 {{.*}}, 8
+; CHECK: ret i64 %d
+}
+
+define i32 @test5(i32 %A) {
+ %B = zext i32 %A to i128
+ %C = lshr i128 %B, 16
+ %D = trunc i128 %C to i32
+ ret i32 %D
+; CHECK: @test5
+; CHECK: %C = lshr i32 %A, 16
+; CHECK: ret i32 %C
+}
+
+define i32 @test6(i64 %A) {
+ %B = zext i64 %A to i128
+ %C = lshr i128 %B, 32
+ %D = trunc i128 %C to i32
+ ret i32 %D
+; CHECK: @test6
+; CHECK: %C = lshr i64 %A, 32
+; CHECK: %D = trunc i64 %C to i32
+; CHECK: ret i32 %D
+}
+
+define i92 @test7(i64 %A) {
+ %B = zext i64 %A to i128
+ %C = lshr i128 %B, 32
+ %D = trunc i128 %C to i92
+ ret i92 %D
+; CHECK: @test7
+; CHECK: %B = zext i64 %A to i92
+; CHECK: %C = lshr i92 %B, 32
+; CHECK: ret i92 %C
+}
+
+define i64 @test8(i32 %A, i32 %B) {
+ %tmp38 = zext i32 %A to i128
+ %tmp32 = zext i32 %B to i128
+ %tmp33 = shl i128 %tmp32, 32
+ %ins35 = or i128 %tmp33, %tmp38
+ %tmp42 = trunc i128 %ins35 to i64
+ ret i64 %tmp42
+; CHECK: @test8
+; CHECK: %tmp38 = zext i32 %A to i64
+; CHECK: %tmp32 = zext i32 %B to i64
+; CHECK: %tmp33 = shl i64 %tmp32, 32
+; CHECK: %ins35 = or i64 %tmp33, %tmp38
+; CHECK: ret i64 %ins35
+}
+
diff --git a/test/Transforms/InstCombine/urem-simplify-bug.ll b/test/Transforms/InstCombine/urem-simplify-bug.ll
index 7c2b4b01ca66..229f1a85e860 100644
--- a/test/Transforms/InstCombine/urem-simplify-bug.ll
+++ b/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5 }
+; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5}
@.str = internal constant [5 x i8] c"foo\0A\00" ; <[5 x i8]*> [#uses=1]
@.str1 = internal constant [5 x i8] c"bar\0A\00" ; <[5 x i8]*> [#uses=1]
diff --git a/test/Transforms/JumpThreading/2010-08-26-and.ll b/test/Transforms/JumpThreading/2010-08-26-and.ll
new file mode 100644
index 000000000000..17a0aba2faef
--- /dev/null
+++ b/test/Transforms/JumpThreading/2010-08-26-and.ll
@@ -0,0 +1,162 @@
+; RUN: opt -jump-threading -enable-jump-threading-lvi -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%class.StringSwitch = type { i8*, i32, i32, i8 }
+
+@.str = private constant [4 x i8] c"red\00" ; <[4 x i8]*> [#uses=1]
+@.str1 = private constant [7 x i8] c"orange\00" ; <[7 x i8]*> [#uses=1]
+@.str2 = private constant [7 x i8] c"yellow\00" ; <[7 x i8]*> [#uses=1]
+@.str3 = private constant [6 x i8] c"green\00" ; <[6 x i8]*> [#uses=1]
+@.str4 = private constant [5 x i8] c"blue\00" ; <[5 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c"indigo\00" ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [7 x i8] c"violet\00" ; <[7 x i8]*> [#uses=1]
+@.str7 = private constant [12 x i8] c"Color = %d\0A\00" ; <[12 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+ %cmp142 = icmp sgt i32 %argc, 1 ; <i1> [#uses=1]
+ br i1 %cmp142, label %bb.nph, label %for.end
+
+bb.nph: ; preds = %entry
+ %tmp = add i32 %argc, -2 ; <i32> [#uses=1]
+ %tmp144 = zext i32 %tmp to i64 ; <i64> [#uses=1]
+ %tmp145 = add i64 %tmp144, 1 ; <i64> [#uses=1]
+ br label %land.lhs.true.i
+
+land.lhs.true.i: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %bb.nph
+ %retval.0.i.pre161 = phi i32 [ undef, %bb.nph ], [ %retval.0.i.pre, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i32> [#uses=3]
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1]
+ %tmp146 = add i64 %indvar, 1 ; <i64> [#uses=3]
+ %arrayidx = getelementptr i8** %argv, i64 %tmp146 ; <i8**> [#uses=1]
+ %tmp6 = load i8** %arrayidx, align 8 ; <i8*> [#uses=8]
+ %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1]
+ %conv.i.i = trunc i64 %call.i.i to i32 ; <i32> [#uses=6]\
+; CHECK: switch i32 %conv.i.i
+; CHECK-NOT: if.then.i40
+; CHECK: }
+ switch i32 %conv.i.i, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit [
+ i32 3, label %land.lhs.true5.i
+ i32 6, label %land.lhs.true5.i37
+ ]
+
+land.lhs.true5.i: ; preds = %land.lhs.true.i
+ %call.i = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* %tmp6, i64 4) nounwind ; <i32> [#uses=1]
+ %cmp9.i = icmp eq i32 %call.i, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i, label %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit: ; preds = %land.lhs.true5.i
+ br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i37: ; preds = %land.lhs.true.i
+ %call.i35 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str1, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+ %cmp9.i36 = icmp eq i32 %call.i35, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i36, label %if.then.i40, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+if.then.i40: ; preds = %land.lhs.true5.i37
+ br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i40, %land.lhs.true5.i37, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, %land.lhs.true5.i, %land.lhs.true.i
+ %retval.0.i.pre159 = phi i32 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre161, %land.lhs.true5.i37 ], [ 2, %if.then.i40 ], [ %retval.0.i.pre161, %land.lhs.true5.i ], [ %retval.0.i.pre161, %land.lhs.true.i ] ; <i32> [#uses=2]
+ %tmp2.i44 = phi i8 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ 0, %land.lhs.true5.i37 ], [ 1, %if.then.i40 ], [ 0, %land.lhs.true5.i ], [ 0, %land.lhs.true.i ] ; <i8> [#uses=3]
+ %tobool.i46 = icmp eq i8 %tmp2.i44, 0 ; <i1> [#uses=1]
+ %cmp.i49 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1]
+ %or.cond = and i1 %tobool.i46, %cmp.i49 ; <i1> [#uses=1]
+ br i1 %or.cond, label %land.lhs.true5.i55, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+land.lhs.true5.i55: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+ %call.i53 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+ %cmp9.i54 = icmp eq i32 %call.i53, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i54, label %if.then.i58, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+if.then.i58: ; preds = %land.lhs.true5.i55
+ br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60: ; preds = %if.then.i58, %land.lhs.true5.i55, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+ %retval.0.i.pre158 = phi i32 [ %retval.0.i.pre159, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre159, %land.lhs.true5.i55 ], [ 3, %if.then.i58 ] ; <i32> [#uses=2]
+ %tmp2.i63 = phi i8 [ %tmp2.i44, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i44, %land.lhs.true5.i55 ], [ 1, %if.then.i58 ] ; <i8> [#uses=3]
+ %tmp14.i64 = and i8 %tmp2.i63, 1 ; <i8> [#uses=1]
+ %tobool.i65 = icmp eq i8 %tmp14.i64, 0 ; <i1> [#uses=1]
+ %cmp.i68 = icmp eq i32 %conv.i.i, 5 ; <i1> [#uses=1]
+ %or.cond168 = and i1 %tobool.i65, %cmp.i68 ; <i1> [#uses=1]
+ br i1 %or.cond168, label %land.lhs.true5.i74, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i74: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+ %call.i72 = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i8* %tmp6, i64 6) nounwind ; <i32> [#uses=1]
+ %cmp9.i73 = icmp eq i32 %call.i72, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i73, label %if.then.i77, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+if.then.i77: ; preds = %land.lhs.true5.i74
+ br label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i77, %land.lhs.true5.i74, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+ %retval.0.i.pre157 = phi i32 [ %retval.0.i.pre158, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %retval.0.i.pre158, %land.lhs.true5.i74 ], [ 4, %if.then.i77 ] ; <i32> [#uses=2]
+ %tmp2.i81 = phi i8 [ %tmp2.i63, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %tmp2.i63, %land.lhs.true5.i74 ], [ 1, %if.then.i77 ] ; <i8> [#uses=3]
+ %tmp14.i82 = and i8 %tmp2.i81, 1 ; <i8> [#uses=1]
+ %tobool.i83 = icmp eq i8 %tmp14.i82, 0 ; <i1> [#uses=1]
+ %cmp.i86 = icmp eq i32 %conv.i.i, 4 ; <i1> [#uses=1]
+ %or.cond169 = and i1 %tobool.i83, %cmp.i86 ; <i1> [#uses=1]
+ br i1 %or.cond169, label %land.lhs.true5.i92, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i92: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+ %call.i90 = call i32 @memcmp(i8* getelementptr inbounds ([5 x i8]* @.str4, i64 0, i64 0), i8* %tmp6, i64 5) nounwind ; <i32> [#uses=1]
+ %cmp9.i91 = icmp eq i32 %call.i90, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i91, label %if.then.i95, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+if.then.i95: ; preds = %land.lhs.true5.i92
+ br label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i95, %land.lhs.true5.i92, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+ %retval.0.i.pre156 = phi i32 [ %retval.0.i.pre157, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre157, %land.lhs.true5.i92 ], [ 5, %if.then.i95 ] ; <i32> [#uses=2]
+ %tmp2.i99 = phi i8 [ %tmp2.i81, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i81, %land.lhs.true5.i92 ], [ 1, %if.then.i95 ] ; <i8> [#uses=3]
+ %tmp14.i100 = and i8 %tmp2.i99, 1 ; <i8> [#uses=1]
+ %tobool.i101 = icmp eq i8 %tmp14.i100, 0 ; <i1> [#uses=1]
+ %cmp.i104 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1]
+ %or.cond170 = and i1 %tobool.i101, %cmp.i104 ; <i1> [#uses=1]
+ br i1 %or.cond170, label %land.lhs.true5.i110, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+land.lhs.true5.i110: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+ %call.i108 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+ %cmp9.i109 = icmp eq i32 %call.i108, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i109, label %if.then.i113, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+if.then.i113: ; preds = %land.lhs.true5.i110
+ br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115: ; preds = %if.then.i113, %land.lhs.true5.i110, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+ %retval.0.i.pre155 = phi i32 [ %retval.0.i.pre156, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre156, %land.lhs.true5.i110 ], [ 6, %if.then.i113 ] ; <i32> [#uses=2]
+ %tmp2.i118 = phi i8 [ %tmp2.i99, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i99, %land.lhs.true5.i110 ], [ 1, %if.then.i113 ] ; <i8> [#uses=3]
+ %tmp14.i119 = and i8 %tmp2.i118, 1 ; <i8> [#uses=1]
+ %tobool.i120 = icmp eq i8 %tmp14.i119, 0 ; <i1> [#uses=1]
+ %cmp.i123 = icmp eq i32 %conv.i.i, 6 ; <i1> [#uses=1]
+ %or.cond171 = and i1 %tobool.i120, %cmp.i123 ; <i1> [#uses=1]
+ br i1 %or.cond171, label %land.lhs.true5.i129, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+land.lhs.true5.i129: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+ %call.i127 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str6, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+ %cmp9.i128 = icmp eq i32 %call.i127, 0 ; <i1> [#uses=1]
+ br i1 %cmp9.i128, label %if.then.i132, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+if.then.i132: ; preds = %land.lhs.true5.i129
+ br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.then.i132, %land.lhs.true5.i129, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+ %retval.0.i.pre = phi i32 [ %retval.0.i.pre155, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %retval.0.i.pre155, %land.lhs.true5.i129 ], [ 7, %if.then.i132 ] ; <i32> [#uses=2]
+ %tmp2.i137 = phi i8 [ %tmp2.i118, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %tmp2.i118, %land.lhs.true5.i129 ], [ 1, %if.then.i132 ] ; <i8> [#uses=1]
+ %tmp7.i138 = and i8 %tmp2.i137, 1 ; <i8> [#uses=1]
+ %tobool.i139 = icmp eq i8 %tmp7.i138, 0 ; <i1> [#uses=1]
+ %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1]
+ %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0]
+ %exitcond = icmp eq i64 %tmp146, %tmp145 ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.end, label %land.lhs.true.i
+
+for.end: ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %entry
+ ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) nounwind readonly
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 503d301892ee..cd274e78c9fc 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -147,11 +147,17 @@ define i32 @test6(i32 %A) {
; CHECK: @test6
%tmp455 = icmp eq i32 %A, 42
br i1 %tmp455, label %BB1, label %BB2
-
-BB2:
+
+; CHECK: call i32 @f2()
+; CHECK-NEXT: ret i32 3
+
; CHECK: call i32 @f1()
-; CHECK-NEXT: call void @f3()
-; CHECK-NEXT: ret i32 4
+; CHECK-NOT: br
+; CHECK: call void @f3()
+; CHECK-NOT: br
+; CHECK: ret i32 4
+
+BB2:
call i32 @f1()
br label %BB1
@@ -415,4 +421,58 @@ F2:
; CHECK-NEXT: br i1 %N, label %T2, label %F2
}
+; CHECK: @test14
+define i32 @test14(i32 %in) {
+entry:
+ %A = icmp eq i32 %in, 0
+; CHECK: br i1 %A, label %right_ret, label %merge
+ br i1 %A, label %left, label %right
+
+; CHECK-NOT: left:
+left:
+ br label %merge
+
+; CHECK-NOT: right:
+right:
+ %B = call i32 @f1()
+ br label %merge
+
+merge:
+; CHECK-NOT: %C = phi i32 [%in, %left], [%B, %right]
+ %C = phi i32 [%in, %left], [%B, %right]
+ %D = add i32 %C, 1
+ %E = icmp eq i32 %D, 2
+ br i1 %E, label %left_ret, label %right_ret
+
+; CHECK: left_ret:
+left_ret:
+ ret i32 0
+
+right_ret:
+ ret i32 1
+}
+
+; PR5652
+; CHECK: @test15
+define i32 @test15(i32 %len) {
+entry:
+; CHECK: icmp ult i32 %len, 13
+ %tmp = icmp ult i32 %len, 13
+ br i1 %tmp, label %check, label %exit0
+
+exit0:
+ ret i32 0
+
+check:
+ %tmp9 = icmp ult i32 %len, 21
+ br i1 %tmp9, label %exit1, label %exit2
+
+exit2:
+; CHECK-NOT: ret i32 2
+ ret i32 2
+
+exit1:
+ ret i32 1
+; CHECK: }
+}
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index f0fc61e7370f..751bc6518a1a 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -216,6 +216,9 @@ bb61:
; PR5698
define void @test7(i32 %x) {
+entry:
+ br label %tailrecurse
+
tailrecurse:
switch i32 %x, label %return [
i32 2, label %bb2
@@ -433,4 +436,51 @@ for.cond1040: ; preds = %for.body1044, %for.
ret void
}
+; PR7755
+define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp {
+entry:
+ %cmp = icmp sgt i32 undef, 1 ; <i1> [#uses=1]
+ br i1 %c, label %land.end, label %land.rhs
+
+land.rhs: ; preds = %entry
+ br i1 %c2, label %lor.lhs.false.i, label %land.end
+
+lor.lhs.false.i: ; preds = %land.rhs
+ br i1 %c3, label %land.end, label %land.end
+
+land.end:
+ %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] ; <i1> [#uses=1]
+ %cmp12 = and i1 %cmp, %0
+ %xor1 = xor i1 %cmp12, %c4
+ br i1 %xor1, label %if.then, label %if.end
+
+if.then:
+ ret void
+
+if.end:
+ ret void
+}
+
+define void @test17() {
+entry:
+ br i1 undef, label %bb269.us.us, label %bb269.us.us.us
+
+bb269.us.us.us:
+ %indvar = phi i64 [ %indvar.next, %bb287.us.us.us ], [ 0, %entry ]
+ %0 = icmp eq i16 undef, 0
+ br i1 %0, label %bb287.us.us.us, label %bb286.us.us.us
+
+bb287.us.us.us:
+ %indvar.next = add i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, 4
+ br i1 %exitcond, label %bb288.bb289.loopexit_crit_edge, label %bb269.us.us.us
+bb286.us.us.us:
+ unreachable
+
+bb269.us.us:
+ unreachable
+
+bb288.bb289.loopexit_crit_edge:
+ unreachable
+}
diff --git a/test/Transforms/JumpThreading/lvi-load.ll b/test/Transforms/JumpThreading/lvi-load.ll
new file mode 100644
index 000000000000..0bf4187d544b
--- /dev/null
+++ b/test/Transforms/JumpThreading/lvi-load.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -jump-threading -enable-jump-threading-lvi -dce < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+
+%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* }
+%"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" = type { i64 }
+%"struct.llvm::Type" = type opaque
+%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" }
+%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* }
+%"struct.llvm::ValueName" = type opaque
+
+@_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__ = internal constant [5 x i8] c"cast\00", align 8 ; <[5 x i8]*> [#uses=1]
+@.str = private constant [31 x i8] c"include/llvm/Support/Casting.h\00", align 8 ; <[31 x i8]*> [#uses=1]
+@.str1 = private constant [59 x i8] c"isa<X>(Val) && \22cast<Ty>() argument of incompatible type!\22\00", align 8 ; <[59 x i8]*> [#uses=1]
+
+; CHECK: Z3fooPN4llvm5ValueE
+define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp {
+entry:
+ %0 = getelementptr inbounds %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1]
+ %1 = load i8* %0, align 8 ; <i8> [#uses=2]
+ %2 = icmp ugt i8 %1, 20 ; <i1> [#uses=1]
+ br i1 %2, label %bb.i, label %bb2
+
+bb.i: ; preds = %entry
+ %toBoolnot.i.i = icmp ult i8 %1, 21 ; <i1> [#uses=1]
+ br i1 %toBoolnot.i.i, label %bb6.i.i, label %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+
+; CHECK-NOT: assert
+bb6.i.i: ; preds = %bb.i
+ tail call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8]* @_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str, i64 0, i64 0), i32 202, i8* getelementptr inbounds ([59 x i8]* @.str1, i64 0, i64 0)) noreturn
+ unreachable
+
+_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %bb.i
+; CHECK-NOT: null
+ %3 = icmp eq %"struct.llvm::Value"* %V, null ; <i1> [#uses=1]
+ br i1 %3, label %bb2, label %bb
+
+bb: ; preds = %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+ tail call void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"* %V)
+; CHECK: ret
+ ret i8 1
+
+bb2: ; preds = %entry, %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+ ret i8 0
+}
+
+declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn
+
+declare void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"*)
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index 7545641f1aee..5381c88aea63 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s -lcssa -S | \
+; RUN: opt < %s -loopsimplify -lcssa -S | \
; RUN: grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
-; RUN: opt < %s -lcssa -S | \
+; RUN: opt < %s -loopsimplify -lcssa -S | \
; RUN: grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
%struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
new file mode 100644
index 000000000000..88be5c41ccc5
--- /dev/null
+++ b/test/Transforms/LICM/crash.ll
@@ -0,0 +1,61 @@
+; RUN: opt -licm %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+; PR8068
+@g_12 = external global i8, align 1
+define void @test1() nounwind ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.cond, %bb.nph
+ store i8 0, i8* @g_12, align 1
+ %tmp6 = load i8* @g_12, align 1
+ br label %for.cond
+
+for.cond: ; preds = %for.body
+ store i8 %tmp6, i8* @g_12, align 1
+ br i1 false, label %for.cond.for.end10_crit_edge, label %for.body
+
+for.cond.for.end10_crit_edge: ; preds = %for.cond
+ br label %for.end10
+
+for.end10: ; preds = %for.cond.for.end10_crit_edge, %entry
+ ret void
+}
+
+; PR8067
+@g_8 = external global i32, align 4
+
+define void @test2() noreturn nounwind ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %tmp7 = load i32* @g_8, align 4
+ store i32* @g_8, i32** undef, align 16
+ store i32 undef, i32* @g_8, align 4
+ br label %for.body
+}
+
+; PR8102
+define void @test3() {
+entry:
+ %__first = alloca { i32* }
+ br i1 undef, label %for.cond, label %for.end
+
+for.cond: ; preds = %for.cond, %entry
+ %tmp1 = getelementptr { i32*}* %__first, i32 0, i32 0
+ %tmp2 = load i32** %tmp1, align 4
+ %call = tail call i32* @test3helper(i32* %tmp2)
+ %tmp3 = getelementptr { i32*}* %__first, i32 0, i32 0
+ store i32* %call, i32** %tmp3, align 4
+ br i1 false, label %for.cond, label %for.end
+
+for.end: ; preds = %for.cond, %entry
+ ret void
+}
+
+declare i32* @test3helper(i32*)
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index e7d36afb91b1..6f28d53af66e 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -48,3 +48,19 @@ Out: ; preds = %Loop
%C = sub i32 %A, %B ; <i32> [#uses=1]
ret i32 %C
}
+
+
+; This loop invariant instruction should be constant folded, not hoisted.
+define i32 @test3(i1 %c) {
+; CHECK: define i32 @test3
+; CHECK: call void @foo2(i32 6)
+ %A = load i32* @X ; <i32> [#uses=2]
+ br label %Loop
+Loop:
+ %B = add i32 4, 2 ; <i32> [#uses=2]
+ call void @foo2( i32 %B )
+ br i1 %c, label %Loop, label %Out
+Out: ; preds = %Loop
+ %C = sub i32 %A, %B ; <i32> [#uses=1]
+ ret i32 %C
+}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index ef28c38ca607..c1d2b24b0bba 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,4 +1,6 @@
; RUN: opt < %s -licm -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
@X = global i32 7 ; <i32*> [#uses=4]
define void @test1(i32 %i) {
@@ -32,23 +34,21 @@ Entry:
br label %Loop
; CHECK: @test2
; CHECK: Entry:
-; CHECK-NEXT: %X1 = getelementptr i32* @X, i64 0
-; CHECK-NEXT: %X2 = getelementptr i32* @X, i64 0
-; CHECK-NEXT: %X1.promoted = load i32* %X1
+; CHECK-NEXT: %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
; CHECK-NEXT: br label %Loop
Loop: ; preds = %Loop, %0
- %X1 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1]
+ %X1 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1]
%A = load i32* %X1 ; <i32> [#uses=1]
%V = add i32 %A, 1 ; <i32> [#uses=1]
- %X2 = getelementptr i32* @X, i64 0 ; <i32*> [#uses=1]
+ %X2 = getelementptr i32* @X, i64 1 ; <i32*> [#uses=1]
store i32 %V, i32* %X2
br i1 false, label %Loop, label %Exit
Exit: ; preds = %Loop
ret void
; CHECK: Exit:
-; CHECK-NEXT: store i32 %V, i32* %X1
+; CHECK-NEXT: store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
; CHECK-NEXT: ret void
}
@@ -71,3 +71,50 @@ Out: ; preds = %Loop
ret void
}
+; PR8041
+define void @test4(i8* %x, i8 %n) {
+; CHECK: @test4
+ %handle1 = alloca i8*
+ %handle2 = alloca i8*
+ store i8* %x, i8** %handle1
+ br label %loop
+
+loop:
+ %tmp = getelementptr i8* %x, i64 8
+ store i8* %tmp, i8** %handle2
+ br label %subloop
+
+subloop:
+ %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ]
+ %offsetx2 = load i8** %handle2
+ store i8 %n, i8* %offsetx2
+ %newoffsetx2 = getelementptr i8* %offsetx2, i64 -1
+ store i8* %newoffsetx2, i8** %handle2
+ %nextcount = add i8 %count, 1
+ %innerexitcond = icmp sge i8 %nextcount, 8
+ br i1 %innerexitcond, label %innerexit, label %subloop
+
+; Should have promoted 'handle2' accesses.
+; CHECK: subloop:
+; CHECK-NEXT: phi i8* [
+; CHECK-NEXT: %count = phi i8 [
+; CHECK-NEXT: store i8 %n
+; CHECK-NOT: store
+; CHECK: br i1
+
+innerexit:
+ %offsetx1 = load i8** %handle1
+ %val = load i8* %offsetx1
+ %cond = icmp eq i8 %val, %n
+ br i1 %cond, label %exit, label %loop
+
+; Should not have promoted offsetx1 loads.
+; CHECK: innerexit:
+; CHECK: %val = load i8* %offsetx1
+; CHECK: %cond = icmp eq i8 %val, %n
+; CHECK: br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index 11112eb74443..68e4b64bf9bf 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -233,3 +233,17 @@ Out: ; preds = %Loop
; CHECK-NEXT: ret i32 %tmp.6
}
+; Should delete, not sink, dead instructions.
+define void @test11() {
+ br label %Loop
+Loop:
+ %dead = getelementptr %Ty* @X2, i64 0, i32 0
+ br i1 false, label %Loop, label %Out
+Out:
+ ret void
+; CHECK: @test11
+; CHECK: Out:
+; CHECK-NEXT: ret void
+}
+
+
diff --git a/test/Transforms/LoopRotate/phi-duplicate.ll b/test/Transforms/LoopRotate/phi-duplicate.ll
index 9a64e2a9a830..5403e723ee15 100644
--- a/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -27,9 +27,21 @@ for.body: ; preds = %for.cond
for.end: ; preds = %for.cond
ret void
}
-; Should only end up with one phi.
-; CHECK: for.body:
-; CHECK-NEXT: %j.02 = phi i64
-; CHECK-NOT: phi
-; CHECK: ret void
+; Should only end up with one phi. Also, the original for.cond block should
+; be moved to the end of the loop so that the new loop header pleasantly
+; ends up at the top.
+
+; CHECK: define void @test
+; CHECK-NEXT: entry:
+; CHECK-NEXT: icmp slt i64
+; CHECK-NEXT: br i1
+; CHECK-NOT: :
+; CHECK: bb.nph:
+; CHECK-NEXT: br label %for.body
+; CHECK-NOT: :
+; CHECK: for.body:
+; CHECK-NEXT: %j.02 = phi i64
+; CHECK-NOT: phi
+; CHECK: ret void
+; CHECK-NEXT: }
diff --git a/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
new file mode 100644
index 000000000000..2a1ee7d1a72f
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -domfrontier -loopsimplify -domfrontier -verify-dom-info -analyze
+
+
+define void @a() nounwind {
+entry:
+ br i1 undef, label %bb37, label %bb1.i
+
+bb1.i: ; preds = %bb1.i, %bb
+ %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; <i64> [#uses=1]
+ %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
+ %exitcond = icmp eq i64 %indvar.next, 576 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb37, label %bb1.i
+
+bb37: ; preds = %bb1.i, %bb
+ br label %return
+
+
+return: ; preds = %bb39
+ ret void
+}
diff --git a/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/test/Transforms/LoopSimplify/indirectbr-backedge.ll
new file mode 100644
index 000000000000..ca6e47fcecd3
--- /dev/null
+++ b/test/Transforms/LoopSimplify/indirectbr-backedge.ll
@@ -0,0 +1,35 @@
+; RUN: opt -loopsimplify -S < %s | FileCheck %s
+
+; LoopSimplify shouldn't split loop backedges that use indirectbr.
+
+; CHECK: bb1: ; preds = %bb5, %bb
+; CHECK-NEXT: indirectbr
+
+; CHECK: bb5: ; preds = %bb1
+; CHECK-NEXT: br label %bb1{{$}}
+
+define void @foo(i8* %p) nounwind {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb5, %bb1, %bb
+ indirectbr i8* %p, [label %bb6, label %bb7, label %bb1, label %bb2, label %bb3, label %bb5, label %bb4]
+
+bb2: ; preds = %bb1
+ ret void
+
+bb3: ; preds = %bb1
+ ret void
+
+bb4: ; preds = %bb1
+ ret void
+
+bb5: ; preds = %bb1
+ br label %bb1
+
+bb6: ; preds = %bb1
+ ret void
+
+bb7: ; preds = %bb1
+ ret void
+}
diff --git a/test/Transforms/LoopSimplify/preserve-scev.ll b/test/Transforms/LoopSimplify/preserve-scev.ll
new file mode 100644
index 000000000000..017a0d210849
--- /dev/null
+++ b/test/Transforms/LoopSimplify/preserve-scev.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>}
+; PR8079
+
+; LoopSimplify should invalidate indvars when splitting out the
+; inner loop.
+
+@maxStat = external global i32
+
+define i32 @test() nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %if.then5, %if.end, %entry
+ %cuts.1 = phi i32 [ 0, %entry ], [ %inc, %if.then5 ], [ %cuts.1, %if.end ]
+ %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ]
+ %add = add i32 %0, 1
+ %cmp = icmp slt i32 %0, 1
+ %tmp1 = load i32* @maxStat, align 4
+ br i1 %cmp, label %for.body, label %for.cond14.preheader
+
+for.cond14.preheader: ; preds = %for.cond
+ %cmp1726 = icmp sgt i32 %tmp1, 0
+ br i1 %cmp1726, label %for.body18, label %return
+
+for.body: ; preds = %for.cond
+ %cmp2 = icmp sgt i32 %tmp1, 100
+ br i1 %cmp2, label %return, label %if.end
+
+if.end: ; preds = %for.body
+ %cmp4 = icmp sgt i32 %tmp1, -1
+ br i1 %cmp4, label %if.then5, label %for.cond
+
+if.then5: ; preds = %if.end
+ call void @foo() nounwind
+ %inc = add i32 %cuts.1, 1
+ br label %for.cond
+
+for.body18: ; preds = %for.body18, %for.cond14.preheader
+ %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ]
+ call void @foo() nounwind
+ %1 = add nsw i32 %i13.027, 1
+ %tmp16 = load i32* @maxStat, align 4
+ %cmp17 = icmp slt i32 %1, %tmp16
+ br i1 %cmp17, label %for.body18, label %return
+
+return: ; preds = %for.body18, %for.body, %for.cond14.preheader
+ ret i32 0
+}
+
+declare void @foo() nounwind
diff --git a/test/Transforms/LoopStrengthReduce/pr3571.ll b/test/Transforms/LoopStrengthReduce/pr3571.ll
index 9ad27d5ff114..a23e4db49705 100644
--- a/test/Transforms/LoopStrengthReduce/pr3571.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3571.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -loop-reduce | llvm-dis
; PR3571
-target triple = "i386-mingw32"
+target triple = "i386-pc-mingw32"
define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind {
entry:
br label %_ZNK11QModelIndex7isValidEv.exit.i
diff --git a/test/Transforms/LoopStrengthReduce/uglygep.ll b/test/Transforms/LoopStrengthReduce/uglygep.ll
index dca97e9ad187..8af5cf1dfd72 100644
--- a/test/Transforms/LoopStrengthReduce/uglygep.ll
+++ b/test/Transforms/LoopStrengthReduce/uglygep.ll
@@ -4,7 +4,6 @@
; should be able to form pretty GEPs.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
define void @Z4() nounwind {
bb:
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
new file mode 100644
index 000000000000..73391ca8d19d
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false cas of condition %a though.
+
+; STATS: 2 loop-unswitch - Number of branches unswitched
+; STATS: 1 loop-unswitch - Number of unswitches that are trivial
+
+; CHECK: @func_16
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1
+
+; CHECK: cond.end.us:
+; CHECK-NEXT: br label %cond.end.us
+
+; CHECK: abort0.split:
+; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1:
+; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+ br i1 %b, label %for.body, label %abort1
+
+abort0:
+ call void @end0() noreturn nounwind
+ unreachable
+
+abort1:
+ call void @end1() noreturn nounwind
+ unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll
new file mode 100644
index 000000000000..5b110d6b7eba
--- /dev/null
+++ b/test/Transforms/LowerAtomic/atomic-load.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* %ptr, i8 %delta)
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* %ptr, i8 %delta)
+declare i8 @llvm.atomic.load.min.i8.p0i8(i8* %ptr, i8 %delta)
+
+define i8 @add() {
+; CHECK: @add
+ %i = alloca i8
+ %j = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: add
+; CHECK-NEXT: store
+ ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @nand() {
+; CHECK: @nand
+ %i = alloca i8
+ %j = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: and
+; CHECK-NEXT: xor
+; CHECK-NEXT: store
+ ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @min() {
+; CHECK: @min
+ %i = alloca i8
+ %j = call i8 @llvm.atomic.load.min.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: store
+ ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
diff --git a/test/Transforms/LowerAtomic/atomic-swap.ll b/test/Transforms/LowerAtomic/atomic-swap.ll
new file mode 100644
index 000000000000..0a59c8595e6a
--- /dev/null
+++ b/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %ptr, i8 %cmp, i8 %val)
+declare i8 @llvm.atomic.swap.i8.p0i8(i8* %ptr, i8 %val)
+
+define i8 @cmpswap() {
+; CHECK: @cmpswap
+ %i = alloca i8
+ %j = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %i, i8 0, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: store
+ ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @swap() {
+; CHECK: @swap
+ %i = alloca i8
+ %j = call i8 @llvm.atomic.swap.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: store
+ ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
diff --git a/test/Transforms/LowerAtomic/barrier.ll b/test/Transforms/LowerAtomic/barrier.ll
new file mode 100644
index 000000000000..218c5ba8d18e
--- /dev/null
+++ b/test/Transforms/LowerAtomic/barrier.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare void @llvm.memory.barrier(i1 %ll, i1 %ls, i1 %sl, i1 %ss, i1 %device)
+
+define void @barrier() {
+; CHECK: @barrier
+ call void @llvm.memory.barrier(i1 0, i1 0, i1 0, i1 0, i1 0)
+; CHECK-NEXT: ret
+ ret void
+}
diff --git a/test/Transforms/SSI/dg.exp b/test/Transforms/LowerAtomic/dg.exp
index f2005891a59a..f2005891a59a 100644
--- a/test/Transforms/SSI/dg.exp
+++ b/test/Transforms/LowerAtomic/dg.exp
diff --git a/test/Transforms/MergeFunc/vectors-and-arrays.ll b/test/Transforms/MergeFunc/vectors-and-arrays.ll
new file mode 100644
index 000000000000..dc64a0858ba8
--- /dev/null
+++ b/test/Transforms/MergeFunc/vectors-and-arrays.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged
+; This used to crash with an assert.
+
+define <2 x i8> @v1(<2 x i8> %x) {
+ ret <2 x i8> %x
+}
+
+define <4 x i8> @v2(<4 x i8> %x) {
+ ret <4 x i8> %x
+}
+
+define [2 x i8] @a1([2 x i8] %x) {
+ ret [2 x i8] %x
+}
+
+define [4 x i8] @a2([4 x i8] %x) {
+ ret [4 x i8] %x
+}
diff --git a/test/Transforms/PartialSpecialize/two-specializations.ll b/test/Transforms/PartialSpecialize/two-specializations.ll
index c85ddb78dd1a..bc3da22e1855 100644
--- a/test/Transforms/PartialSpecialize/two-specializations.ll
+++ b/test/Transforms/PartialSpecialize/two-specializations.ll
@@ -1,8 +1,8 @@
; If there are two specializations of a function, make sure each callsite
; calls the right one.
;
-; RN: opt -S -partialspecialization %s | FileCheck %s
-; RUN: true
+; RUN: opt -S -partialspecialization -disable-inlining %s | opt -S -inline | FileCheck %s -check-prefix=CORRECT
+; RUN: opt -S -partialspecialization -disable-inlining %s | FileCheck %s
declare void @callback1()
declare void @callback2()
@@ -14,14 +14,18 @@ define internal void @UseCallback(void()* %pCallback) {
define void @foo(void()* %pNonConstCallback)
{
Entry:
+; CORRECT: Entry
+; CORRECT-NEXT: call void @callback1()
+; CORRECT-NEXT: call void @callback1()
+; CORRECT-NEXT: call void @callback2()
+; CORRECT-NEXT: call void %pNonConstCallback()
+; CORRECT-NEXT: call void @callback1()
+; CORRECT-NEXT: call void @callback2()
+; CORRECT-NEXT: call void @callback2()
; CHECK: Entry
-; CHECK-NEXT: call void @callback1()
-; CHECK-NEXT: call void @callback1()
-; CHECK-NEXT: call void @callback2()
-; CHECK-NEXT: call void %pNonConstCallback()
-; CHECK-NEXT: call void @callback1()
-; CHECK-NEXT: call void @callback2()
-; CHECK-NEXT: call void @callback2()
+; CHECK-NOT: call void @UseCallback(void ()* @callback1)
+; CHECK-NOT: call void @UseCallback(void ()* @callback2)
+; CHECK: ret void
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback1)
call void @UseCallback(void()* @callback2)
diff --git a/test/Transforms/SCCP/ipsccp-addr-taken.ll b/test/Transforms/SCCP/ipsccp-addr-taken.ll
new file mode 100644
index 000000000000..c6572fa5d141
--- /dev/null
+++ b/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -ipsccp -S | FileCheck %s
+; PR7876
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define internal i32 @foo() nounwind noinline ssp {
+entry:
+ ret i32 0
+; CHECK: @foo
+; CHECK: entry:
+; CHECK: ret i32 0
+}
+
+declare i32 @bar()
+
+define internal i32 @test(i32 %c) nounwind noinline ssp {
+bb:
+ %tmp1 = icmp ne i32 %c, 0 ; <i1> [#uses=1]
+ %tmp2 = select i1 %tmp1, i32 ()* @foo, i32 ()* @bar ; <i32 ()*> [#uses=1]
+ %tmp3 = tail call i32 %tmp2() nounwind ; <i32> [#uses=1]
+ ret i32 %tmp3
+}
+
+define i32 @main() nounwind ssp {
+bb:
+ %tmp = tail call i32 @test(i32 1) ; <i32> [#uses=1]
+ ret i32 %tmp
+}
diff --git a/test/Transforms/SSI/2009-07-09-Invoke.ll b/test/Transforms/SSI/2009-07-09-Invoke.ll
deleted file mode 100644
index 20a22172806e..000000000000
--- a/test/Transforms/SSI/2009-07-09-Invoke.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: opt < %s -ssi-everything -disable-output
-; PR4511
-
- %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
- %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* }
- %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
- %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
- %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
- %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
- %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
-
-declare void @_Unwind_Resume(i8*)
-
-declare fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*)
-
-define fastcc void @_ZNSt6vectorISsSaISsEE9push_backERKSs(%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >"* nocapture %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* nocapture %__x) {
-entry:
- br i1 undef, label %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i, label %bb
-
-bb: ; preds = %entry
- ret void
-
-_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i: ; preds = %entry
- %0 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef)
- to label %invcont14.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=3]
-
-invcont14.i: ; preds = %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i
- %1 = icmp eq %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, null ; <i1> [#uses=1]
- br i1 %1, label %bb19.i, label %bb.i17.i
-
-bb.i17.i: ; preds = %invcont14.i
- %2 = invoke fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* undef, i32 0)
- to label %bb2.i25.i unwind label %ppad.i.i.i23.i ; <i8*> [#uses=0]
-
-ppad.i.i.i23.i: ; preds = %bb.i17.i
- invoke void @_Unwind_Resume(i8* undef)
- to label %.noexc.i24.i unwind label %lpad.i29.i
-
-.noexc.i24.i: ; preds = %ppad.i.i.i23.i
- unreachable
-
-bb2.i25.i: ; preds = %bb.i17.i
- unreachable
-
-lpad.i29.i: ; preds = %ppad.i.i.i23.i
- invoke void @_Unwind_Resume(i8* undef)
- to label %.noexc.i9 unwind label %ppad81.i
-
-.noexc.i9: ; preds = %lpad.i29.i
- unreachable
-
-bb19.i: ; preds = %invcont14.i
- %3 = getelementptr %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %0, i32 1 ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=2]
- %4 = invoke fastcc %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* @_ZSt24__uninitialized_copy_auxIPSsS0_ET0_T_S2_S1_St12__false_type(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* undef, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %3)
- to label %invcont20.i unwind label %ppad81.i ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0]
-
-invcont20.i: ; preds = %bb19.i
- unreachable
-
-invcont32.i: ; preds = %ppad81.i
- unreachable
-
-ppad81.i: ; preds = %bb19.i, %lpad.i29.i, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i
- %__new_finish.0.i = phi %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* [ %0, %lpad.i29.i ], [ undef, %_ZNSt12_Vector_baseISsSaISsEE11_M_allocateEj.exit.i ], [ %3, %bb19.i ] ; <%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*> [#uses=0]
- br i1 undef, label %invcont32.i, label %bb.i.i.i.i
-
-bb.i.i.i.i: ; preds = %bb.i.i.i.i, %ppad81.i
- br label %bb.i.i.i.i
-}
-
-declare fastcc i8* @_ZNSs4_Rep8_M_cloneERKSaIcEj(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep"* nocapture, i32)
diff --git a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll b/test/Transforms/SSI/2009-08-15-UnreachableBB.ll
deleted file mode 100644
index 0fe37ec74098..000000000000
--- a/test/Transforms/SSI/2009-08-15-UnreachableBB.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -ssi-everything -disable-output
-
-declare fastcc i32 @ras_Empty(i8** nocapture) nounwind readonly
-
-define i32 @cc_Tautology() nounwind {
-entry:
- unreachable
-
-cc_InitData.exit: ; No predecessors!
- %0 = call fastcc i32 @ras_Empty(i8** undef) nounwind ; <i32> [#uses=1]
- %1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
- br i1 %1, label %bb2, label %bb6
-
-bb2: ; preds = %cc_InitData.exit
- unreachable
-
-bb6: ; preds = %cc_InitData.exit
- ret i32 undef
-}
diff --git a/test/Transforms/SSI/2009-08-17-CritEdge.ll b/test/Transforms/SSI/2009-08-17-CritEdge.ll
deleted file mode 100644
index 61bd2dc693f4..000000000000
--- a/test/Transforms/SSI/2009-08-17-CritEdge.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -ssi-everything -disable-output
-
-define void @test(i32 %x) {
-entry:
- br label %label1
-label1:
- %A = phi i32 [ 0, %entry ], [ %A.1, %label2 ]
- %B = icmp slt i32 %A, %x
- br i1 %B, label %label2, label %label2
-label2:
- %A.1 = add i32 %A, 1
- br label %label1
-label3: ; No predecessors!
- ret void
-}
diff --git a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll b/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll
deleted file mode 100644
index 64bed191def0..000000000000
--- a/test/Transforms/SSI/2009-08-19-UnreachableBB2.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -ssi-everything -disable-output
-
-define void @foo() {
-entry:
- %tmp0 = load i64* undef, align 4 ; <i64> [#uses=3]
- br i1 undef, label %end_stmt_playback, label %bb16
-
-readJournalHdr.exit: ; No predecessors!
- br label %end_stmt_playback
-
-bb16: ; preds = %bb7
- %tmp1 = icmp slt i64 0, %tmp0 ; <i1> [#uses=1]
- br i1 %tmp1, label %bb16, label %bb17
-
-bb17: ; preds = %bb16
- store i64 %tmp0, i64* undef, align 4
- br label %end_stmt_playback
-
-end_stmt_playback: ; preds = %bb17, %readJournalHdr.exit, %bb6, %bb2
- store i64 %tmp0, i64* undef, align 4
- ret void
-}
diff --git a/test/Transforms/SSI/ssiphi.ll b/test/Transforms/SSI/ssiphi.ll
deleted file mode 100644
index a42b70c3c021..000000000000
--- a/test/Transforms/SSI/ssiphi.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -ssi-everything -S | FileCheck %s
-
-declare void @use(i32)
-declare i32 @create()
-
-define i32 @foo() {
-entry:
- %x = call i32 @create()
- %y = icmp slt i32 %x, 10
- br i1 %y, label %T, label %F
-T:
-; CHECK: SSI_sigma
- call void @use(i32 %x)
- br label %join
-F:
-; CHECK: SSI_sigma
- call void @use(i32 %x)
- br label %join
-join:
-; CHECK: SSI_phi
- ret i32 %x
-}
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
index 4f875b0841b2..fe55426b24a1 100644
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/test/Transforms/ScalarRepl/vector_promote.ll
@@ -1,8 +1,8 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -S | grep {load <4 x float>}
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-apple-darwin10.0.0"
-define void @test(<4 x float>* %F, float %f) {
+define void @test1(<4 x float>* %F, float %f) {
entry:
%G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
@@ -14,6 +14,11 @@ entry:
%tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp6, <4 x float>* %F
ret void
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0
}
define void @test2(<4 x float>* %F, float %f) {
@@ -28,6 +33,11 @@ entry:
%tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1]
store <4 x float> %tmp6, <4 x float>* %F
ret void
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2
}
define void @test3(<4 x float>* %F, float* %f) {
@@ -40,6 +50,11 @@ entry:
%tmp.upgrd.4 = load float* %tmp.upgrd.3 ; <float> [#uses=1]
store float %tmp.upgrd.4, float* %f
ret void
+; CHECK: @test3
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2
}
define void @test4(<4 x float>* %F, float* %f) {
@@ -52,6 +67,11 @@ entry:
%tmp.upgrd.6 = load float* %G.upgrd.5 ; <float> [#uses=1]
store float %tmp.upgrd.6, float* %f
ret void
+; CHECK: @test4
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0
}
define i32 @test5(float %X) { ;; should turn into bitcast.
@@ -61,5 +81,22 @@ define i32 @test5(float %X) { ;; should turn into bitcast.
%a = bitcast float* %X1 to i32*
%tmp = load i32* %a
ret i32 %tmp
+; CHECK: @test5
+; CHECK-NEXT: bitcast float %X to i32
+; CHECK-NEXT: ret i32
+}
+
+
+;; should not turn into <1 x i64> - It is a banned MMX datatype.
+;; rdar://8380055
+define i64 @test6(<2 x float> %X) {
+ %X_addr = alloca <2 x float>
+ store <2 x float> %X, <2 x float>* %X_addr
+ %P = bitcast <2 x float>* %X_addr to i64*
+ %tmp = load i64* %P
+ ret i64 %tmp
+; CHECK: @test6
+; CHECK-NEXT: bitcast <2 x float> %X to i64
+; CHECK-NEXT: ret i64
}
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
index ba33d84f84aa..9c15efccd275 100644
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
@@ -1,7 +1,7 @@
; RUN: opt < %s -simplifycfg -disable-output
; PR2256
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-mingw32"
+target triple = "x86_64-pc-mingw32"
define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval %Z, i1 %cond) nounwind {
bb: ; preds = %entry
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index 83a9fa7ad1b8..7315ff66bd12 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -54,6 +54,5 @@ bb1: ; preds = %entry
return: ; preds = %entry
ret void
; CHECK: @test5
-; CHECK-NEXT: bb:
; CHECK-NEXT: ret void
}
diff --git a/test/Transforms/SimplifyCFG/indirectbr.ll b/test/Transforms/SimplifyCFG/indirectbr.ll
new file mode 100644
index 000000000000..de4f5b607551
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+; SimplifyCFG should eliminate redundant indirectbr edges.
+
+; CHECK: indbrtest0
+; CHECK: indirectbr i8* %t, [label %BB0, label %BB1, label %BB2]
+; CHECK: %x = phi i32 [ 0, %BB0 ], [ 1, %entry ]
+
+declare void @foo()
+declare void @A()
+declare void @B(i32)
+declare void @C()
+
+define void @indbrtest0(i8** %P, i8** %Q) {
+entry:
+ store i8* blockaddress(@indbrtest0, %BB0), i8** %P
+ store i8* blockaddress(@indbrtest0, %BB1), i8** %P
+ store i8* blockaddress(@indbrtest0, %BB2), i8** %P
+ call void @foo()
+ %t = load i8** %Q
+ indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2]
+BB0:
+ call void @A()
+ br label %BB1
+BB1:
+ %x = phi i32 [ 0, %BB0 ], [ 1, %entry ], [ 1, %entry ]
+ call void @B(i32 %x)
+ ret void
+BB2:
+ call void @C()
+ ret void
+}
+
+; SimplifyCFG should convert the indirectbr into a directbr. It would be even
+; better if it removed the branch altogether, but simplifycfdg currently misses
+; that because the predecessor is the entry block.
+
+; CHECK: indbrtest1
+; CHECK: br label %BB0
+
+define void @indbrtest1(i8** %P, i8** %Q) {
+entry:
+ store i8* blockaddress(@indbrtest1, %BB0), i8** %P
+ call void @foo()
+ %t = load i8** %Q
+ indirectbr i8* %t, [label %BB0, label %BB0]
+BB0:
+ call void @A()
+ ret void
+}
+
+; SimplifyCFG should notice that BB0 does not have its address taken and
+; remove it from entry's successor list.
+
+; CHECK: indbrtest2
+; CHECK: entry:
+; CHECK-NEXT: unreachable
+
+define void @indbrtest2(i8* %t) {
+entry:
+ indirectbr i8* %t, [label %BB0, label %BB0]
+BB0:
+ ret void
+}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
new file mode 100644
index 000000000000..3965c3782276
--- /dev/null
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -0,0 +1,19 @@
+; RUN: opt -strip-dead-debug-info -disable-output %s
+define i32 @foo() nounwind ssp {
+entry:
+ ret i32 0, !dbg !8
+}
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ]
+!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
+!8 = metadata !{i32 3, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll
index b2a9ed2813d6..9475f87e8f5b 100644
--- a/test/Transforms/TailCallElim/accum_recursion.ll
+++ b/test/Transforms/TailCallElim/accum_recursion.ll
@@ -1,15 +1,74 @@
-; RUN: opt < %s -tailcallelim -S | not grep call
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
-define i32 @factorial(i32 %x) {
+define i32 @test1_factorial(i32 %x) {
entry:
%tmp.1 = icmp sgt i32 %x, 0 ; <i1> [#uses=1]
br i1 %tmp.1, label %then, label %else
then: ; preds = %entry
%tmp.6 = add i32 %x, -1 ; <i32> [#uses=1]
- %tmp.4 = call i32 @factorial( i32 %tmp.6 ) ; <i32> [#uses=1]
+ %tmp.4 = call i32 @test1_factorial( i32 %tmp.6 ) ; <i32> [#uses=1]
%tmp.7 = mul i32 %tmp.4, %x ; <i32> [#uses=1]
ret i32 %tmp.7
else: ; preds = %entry
ret i32 1
}
+; CHECK: define i32 @test1_factorial
+; CHECK: phi i32
+; CHECK-NOT: call i32
+; CHECK: else:
+
+; This is a more aggressive form of accumulator recursion insertion, which
+; requires noticing that X doesn't change as we perform the tailcall.
+
+define i32 @test2_mul(i32 %x, i32 %y) {
+entry:
+ %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1]
+ br i1 %tmp.1, label %return, label %endif
+endif: ; preds = %entry
+ %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1]
+ %tmp.5 = call i32 @test2_mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1]
+ %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1]
+ ret i32 %tmp.9
+return: ; preds = %entry
+ ret i32 %x
+}
+
+; CHECK: define i32 @test2_mul
+; CHECK: phi i32
+; CHECK-NOT: call i32
+; CHECK: return:
+
+
+define i64 @test3_fib(i64 %n) nounwind readnone {
+; CHECK: @test3_fib
+entry:
+; CHECK: tailrecurse:
+; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ]
+; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ]
+ switch i64 %n, label %bb1 [
+; CHECK: switch i64 %n.tr, label %bb1 [
+ i64 0, label %bb2
+ i64 1, label %bb2
+ ]
+
+bb1:
+; CHECK: bb1:
+ %0 = add i64 %n, -1
+; CHECK: %0 = add i64 %n.tr, -1
+ %1 = tail call i64 @test3_fib(i64 %0) nounwind
+; CHECK: %1 = tail call i64 @test3_fib(i64 %0)
+ %2 = add i64 %n, -2
+; CHECK: %2 = add i64 %n.tr, -2
+ %3 = tail call i64 @test3_fib(i64 %2) nounwind
+; CHECK-NOT: tail call i64 @test3_fib
+ %4 = add nsw i64 %3, %1
+; CHECK: add nsw i64 %accumulator.tr, %1
+ ret i64 %4
+; CHECK: br label %tailrecurse
+
+bb2:
+; CHECK: bb2:
+ ret i64 %n
+; CHECK: ret i64 %accumulator.tr
+}
diff --git a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll b/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll
deleted file mode 100644
index 2a90cf3b22d7..000000000000
--- a/test/Transforms/TailCallElim/accum_recursion_constant_arg.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; This is a more aggressive form of accumulator recursion insertion, which
-; requires noticing that X doesn't change as we perform the tailcall. Thanks
-; go out to the anonymous users of the demo script for "suggesting"
-; optimizations that should be done. :)
-
-; RUN: opt < %s -tailcallelim -S | not grep call
-
-define i32 @mul(i32 %x, i32 %y) {
-entry:
- %tmp.1 = icmp eq i32 %y, 0 ; <i1> [#uses=1]
- br i1 %tmp.1, label %return, label %endif
-endif: ; preds = %entry
- %tmp.8 = add i32 %y, -1 ; <i32> [#uses=1]
- %tmp.5 = call i32 @mul( i32 %x, i32 %tmp.8 ) ; <i32> [#uses=1]
- %tmp.9 = add i32 %tmp.5, %x ; <i32> [#uses=1]
- ret i32 %tmp.9
-return: ; preds = %entry
- ret i32 %x
-}
-
diff --git a/test/Transforms/TailCallElim/switch.ll b/test/Transforms/TailCallElim/switch.ll
deleted file mode 100644
index 33884318b0c8..000000000000
--- a/test/Transforms/TailCallElim/switch.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt %s -tailcallelim -S | FileCheck %s
-
-define i64 @fib(i64 %n) nounwind readnone {
-; CHECK: @fib
-entry:
-; CHECK: tailrecurse:
-; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ]
-; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ]
- switch i64 %n, label %bb1 [
-; CHECK: switch i64 %n.tr, label %bb1 [
- i64 0, label %bb2
- i64 1, label %bb2
- ]
-
-bb1:
-; CHECK: bb1:
- %0 = add i64 %n, -1
-; CHECK: %0 = add i64 %n.tr, -1
- %1 = tail call i64 @fib(i64 %0) nounwind
-; CHECK: %1 = tail call i64 @fib(i64 %0)
- %2 = add i64 %n, -2
-; CHECK: %2 = add i64 %n.tr, -2
- %3 = tail call i64 @fib(i64 %2) nounwind
-; CHECK-NOT: tail call i64 @fib
- %4 = add nsw i64 %3, %1
-; CHECK: add nsw i64 %accumulator.tr, %1
- ret i64 %4
-; CHECK: br label %tailrecurse
-
-bb2:
-; CHECK: bb2:
- ret i64 %n
-; CHECK: ret i64 %accumulator.tr
-}
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
index 88a565684c5d..03e99bc9bf6a 100644
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output | not grep tailduplicate
+; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate
; XFAIL: *
define i32 @foo(i32 %l) nounwind {
diff --git a/test/Verifier/2010-08-07-PointerIntrinsic.ll b/test/Verifier/2010-08-07-PointerIntrinsic.ll
new file mode 100644
index 000000000000..bf5563d9c051
--- /dev/null
+++ b/test/Verifier/2010-08-07-PointerIntrinsic.ll
@@ -0,0 +1,21 @@
+; RUN: not llvm-as < %s 2> %t
+; RUN: grep {Broken module} %t
+; PR7316
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+target triple = "x86-unknown-unknown"
+@aa = global [32 x i8] zeroinitializer, align 1
+@bb = global [16 x i8] zeroinitializer, align 1
+define void @x() nounwind {
+L.0:
+ %0 = getelementptr [32 x i8]* @aa, i32 0, i32 4
+ %1 = bitcast i8* %0 to [16 x i8]*
+ %2 = bitcast [16 x i8]* %1 to [0 x i8]*
+ %3 = getelementptr [16 x i8]* @bb
+ %4 = bitcast [16 x i8]* %3 to [0 x i8]*
+ call void @llvm.memcpy.i32([0 x i8]* %2, [0 x i8]* %4, i32 16, i32 1)
+ br label %return
+return:
+ ret void
+}
+declare void @llvm.memcpy.i32([0 x i8]*, [0 x i8]*, i32, i32) nounwind
diff --git a/test/lit.cfg b/test/lit.cfg
index 5e7e0e444980..f15777c99912 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -46,7 +46,16 @@ if llvm_obj_root is not None:
config.environment['PATH'] = path
# Propogate 'HOME' through the environment.
-config.environment['HOME'] = os.environ['HOME']
+if 'HOME' in os.environ:
+ config.environment['HOME'] = os.environ['HOME']
+
+# Propogate 'INCLUDE' through the environment.
+if 'INCLUDE' in os.environ:
+ config.environment['INCLUDE'] = os.environ['INCLUDE']
+
+# Propogate 'LIB' through the environment.
+if 'LIB' in os.environ:
+ config.environment['LIB'] = os.environ['LIB']
# Propogate LLVM_SRC_ROOT into the environment.
config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
@@ -110,7 +119,7 @@ import re
site_exp = {}
# FIXME: Implement lit.site.cfg.
for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
- m = re.match('set ([^ ]+) "([^"]*)"', line)
+ m = re.match('set ([^ ]+) "(.*)"', line)
if m:
site_exp[m.group(1)] = m.group(2)
@@ -147,13 +156,13 @@ def llvm_supports_target(name):
def llvm_supports_darwin_and_target(name):
return 'darwin' in config.target_triple and llvm_supports_target(name)
-langs = set(site_exp['llvmgcc_langs'].split(','))
+langs = set([s.strip() for s in site_exp['llvmgcc_langs'].split(',')])
def llvm_gcc_supports(name):
- return name in langs
+ return name.strip() in langs
-bindings = set(site_exp['llvm_bindings'].split(','))
+bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')])
def llvm_supports_binding(name):
- return name in bindings
+ return name.strip() in bindings
# Provide on_clone hook for reading 'dg.exp'.
import os
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index b9c77b1c8a38..7ed10e9729de 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -27,8 +27,10 @@ add_subdirectory(llvm-link)
add_subdirectory(lli)
add_subdirectory(llvm-extract)
+add_subdirectory(llvm-diff)
add_subdirectory(bugpoint)
+add_subdirectory(bugpoint-passes)
add_subdirectory(llvm-bcanalyzer)
add_subdirectory(llvm-stub)
add_subdirectory(edis)
diff --git a/tools/Makefile b/tools/Makefile
index 9bc74fe3fa4c..aa07a2b1b77f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,16 +15,13 @@ OPTIONAL_PARALLEL_DIRS := clang
# NOTE: The tools are organized into five groups of four consisting of one
# large and three small executables. This is done to minimize memory load
# in parallel builds. Please retain this ordering.
-
-# libEnhancedDisassembly must be built ahead of llvm-mc
-# because llvm-mc links against libEnhancedDisassembly
-DIRS := llvm-config edis llvm-mc
+DIRS := llvm-config
PARALLEL_DIRS := opt llvm-as llvm-dis \
llc llvm-ranlib llvm-ar llvm-nm \
llvm-ld llvm-prof llvm-link \
- lli llvm-extract \
+ lli llvm-extract llvm-mc \
bugpoint llvm-bcanalyzer llvm-stub \
- llvmc
+ llvmc llvm-diff
# Let users override the set of tools to build from the command line.
ifdef ONLY_TOOLS
@@ -34,6 +31,7 @@ endif
include $(LEVEL)/Makefile.config
+
# These libraries build as dynamic libraries (.dylib /.so), they can only be
# built if ENABLE_PIC is set.
ifeq ($(ENABLE_PIC),1)
@@ -46,6 +44,16 @@ ifeq ($(ENABLE_PIC),1)
else
PARALLEL_DIRS += lto
endif
+
+ PARALLEL_DIRS += bugpoint-passes
+
+ # The edis library is only supported if ARM and/or X86 are enabled, and if
+ # LLVM is being built PIC on platforms that support dylibs.
+ ifneq ($(DISABLE_EDIS),1)
+ ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
+ PARALLEL_DIRS += edis
+ endif
+ endif
endif
endif
diff --git a/tools/bugpoint-passes/CMakeLists.txt b/tools/bugpoint-passes/CMakeLists.txt
new file mode 100644
index 000000000000..50109a52c249
--- /dev/null
+++ b/tools/bugpoint-passes/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_loadable_module( BugpointPasses
+ TestPasses.cpp
+ )
diff --git a/tools/bugpoint-passes/Makefile b/tools/bugpoint-passes/Makefile
new file mode 100644
index 000000000000..b4ad3e4ad3b0
--- /dev/null
+++ b/tools/bugpoint-passes/Makefile
@@ -0,0 +1,23 @@
+##===- tools/bugpoint-passes/Makefile -- -------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = BugpointPasses
+LOADABLE_MODULE = 1
+USEDLIBS =
+
+# If we don't need RTTI or EH, there's no reason to export anything
+# from this plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/bugpoint.exports
+endif
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/bugpoint/TestPasses.cpp b/tools/bugpoint-passes/TestPasses.cpp
index 900bf632a83b..1535b0388561 100644
--- a/tools/bugpoint/TestPasses.cpp
+++ b/tools/bugpoint-passes/TestPasses.cpp
@@ -27,7 +27,7 @@ namespace {
class CrashOnCalls : public BasicBlockPass {
public:
static char ID; // Pass ID, replacement for typeid
- CrashOnCalls() : BasicBlockPass(&ID) {}
+ CrashOnCalls() : BasicBlockPass(ID) {}
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -41,12 +41,12 @@ namespace {
return false;
}
};
+}
- char CrashOnCalls::ID = 0;
- RegisterPass<CrashOnCalls>
+char CrashOnCalls::ID = 0;
+static RegisterPass<CrashOnCalls>
X("bugpoint-crashcalls",
"BugPoint Test Pass - Intentionally crash on CallInsts");
-}
namespace {
/// DeleteCalls - This pass is used to test bugpoint. It intentionally
@@ -54,7 +54,7 @@ namespace {
class DeleteCalls : public BasicBlockPass {
public:
static char ID; // Pass ID, replacement for typeid
- DeleteCalls() : BasicBlockPass(&ID) {}
+ DeleteCalls() : BasicBlockPass(ID) {}
private:
bool runOnBasicBlock(BasicBlock &BB) {
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
@@ -67,9 +67,9 @@ namespace {
return false;
}
};
+}
- char DeleteCalls::ID = 0;
- RegisterPass<DeleteCalls>
+char DeleteCalls::ID = 0;
+static RegisterPass<DeleteCalls>
Y("bugpoint-deletecalls",
"BugPoint Test Pass - Intentionally 'misoptimize' CallInsts");
-}
diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/bugpoint-passes/bugpoint.exports
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 45a0d4dd17a3..6966671f9cb2 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -56,22 +56,22 @@ void BugDriver::setNewProgram(Module *M) {
/// getPassesString - Turn a list of passes into a string which indicates the
/// command line options that must be passed to add the passes.
///
-std::string llvm::getPassesString(const std::vector<const PassInfo*> &Passes) {
+std::string llvm::getPassesString(const std::vector<std::string> &Passes) {
std::string Result;
for (unsigned i = 0, e = Passes.size(); i != e; ++i) {
if (i) Result += " ";
Result += "-";
- Result += Passes[i]->getPassArgument();
+ Result += Passes[i];
}
return Result;
}
-BugDriver::BugDriver(const char *toolname, bool as_child, bool find_bugs,
+BugDriver::BugDriver(const char *toolname, bool find_bugs,
unsigned timeout, unsigned memlimit, bool use_valgrind,
LLVMContext& ctxt)
: Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
Program(0), Interpreter(0), SafeInterpreter(0), gcc(0),
- run_as_child(as_child), run_find_bugs(find_bugs), Timeout(timeout),
+ run_find_bugs(find_bugs), Timeout(timeout),
MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
BugDriver::~BugDriver() {
@@ -119,15 +119,13 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
Program = ParseInputFile(Filenames[0], Context);
if (Program == 0) return true;
- if (!run_as_child)
- outs() << "Read input file : '" << Filenames[0] << "'\n";
+ outs() << "Read input file : '" << Filenames[0] << "'\n";
for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
if (M.get() == 0) return true;
- if (!run_as_child)
- outs() << "Linking in input file: '" << Filenames[i] << "'\n";
+ outs() << "Linking in input file: '" << Filenames[i] << "'\n";
std::string ErrorMessage;
if (Linker::LinkModules(Program, M.get(), &ErrorMessage)) {
errs() << ToolName << ": error linking in '" << Filenames[i] << "': "
@@ -136,8 +134,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
}
}
- if (!run_as_child)
- outs() << "*** All input ok\n";
+ outs() << "*** All input ok\n";
// All input files read successfully!
return false;
@@ -149,14 +146,6 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
/// variables are set up from command line arguments.
///
bool BugDriver::run(std::string &ErrMsg) {
- // The first thing to do is determine if we're running as a child. If we are,
- // then what to do is very narrow. This form of invocation is only called
- // from the runPasses method to actually run those passes in a child process.
- if (run_as_child) {
- // Execute the passes
- return runPassesAsChild(PassesToRun);
- }
-
if (run_find_bugs) {
// Rearrange the passes and apply them to the program. Repeat this process
// until the user kills the program or we find a bug.
@@ -172,7 +161,7 @@ bool BugDriver::run(std::string &ErrMsg) {
// miscompilation.
if (!PassesToRun.empty()) {
outs() << "Running selected passes on program to test for crash: ";
- if (runPasses(PassesToRun))
+ if (runPasses(Program, PassesToRun))
return debugOptimizerCrash();
}
@@ -211,7 +200,7 @@ bool BugDriver::run(std::string &ErrMsg) {
// matches, then we assume there is a miscompilation bug and try to
// diagnose it.
outs() << "*** Checking the code generator...\n";
- bool Diff = diffProgram("", "", false, &Error);
+ bool Diff = diffProgram(Program, "", "", false, &Error);
if (!Error.empty()) {
errs() << Error;
return debugCodeGeneratorCrash(ErrMsg);
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index 4f6bae5d5abb..e48806aee6bc 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -47,11 +47,10 @@ class BugDriver {
const char *ToolName; // argv[0] of bugpoint
std::string ReferenceOutputFile; // Name of `good' output file
Module *Program; // The raw program, linked together
- std::vector<const PassInfo*> PassesToRun;
+ std::vector<std::string> PassesToRun;
AbstractInterpreter *Interpreter; // How to run the program
AbstractInterpreter *SafeInterpreter; // To generate reference output, etc.
GCC *gcc;
- bool run_as_child;
bool run_find_bugs;
unsigned Timeout;
unsigned MemoryLimit;
@@ -62,25 +61,24 @@ class BugDriver {
friend class ReduceMisCodegenFunctions;
public:
- BugDriver(const char *toolname, bool as_child, bool find_bugs,
+ BugDriver(const char *toolname, bool find_bugs,
unsigned timeout, unsigned memlimit, bool use_valgrind,
LLVMContext& ctxt);
~BugDriver();
const char *getToolName() const { return ToolName; }
- LLVMContext& getContext() { return Context; }
+ LLVMContext& getContext() const { return Context; }
// Set up methods... these methods are used to copy information about the
// command line arguments into instance variables of BugDriver.
//
bool addSources(const std::vector<std::string> &FileNames);
- template<class It>
- void addPasses(It I, It E) { PassesToRun.insert(PassesToRun.end(), I, E); }
- void setPassesToRun(const std::vector<const PassInfo*> &PTR) {
+ void addPass(std::string p) { PassesToRun.push_back(p); }
+ void setPassesToRun(const std::vector<std::string> &PTR) {
PassesToRun = PTR;
}
- const std::vector<const PassInfo*> &getPassesToRun() const {
+ const std::vector<std::string> &getPassesToRun() const {
return PassesToRun;
}
@@ -132,12 +130,8 @@ public:
/// runPasses - Run all of the passes in the "PassesToRun" list, discard the
/// output, and return true if any of the passes crashed.
- bool runPasses(Module *M = 0) {
- if (M == 0) M = Program;
- std::swap(M, Program);
- bool Result = runPasses(PassesToRun);
- std::swap(M, Program);
- return Result;
+ bool runPasses(Module *M) const {
+ return runPasses(M, PassesToRun);
}
Module *getProgram() const { return Program; }
@@ -169,23 +163,26 @@ public:
/// setting Error if an error occurs. This is used for code generation
/// crash testing.
///
- void compileProgram(Module *M, std::string *Error);
+ void compileProgram(Module *M, std::string *Error) const;
/// executeProgram - This method runs "Program", capturing the output of the
/// program to a file. A recommended filename may be optionally specified.
///
- std::string executeProgram(std::string OutputFilename,
+ std::string executeProgram(const Module *Program,
+ std::string OutputFilename,
std::string Bitcode,
const std::string &SharedObjects,
AbstractInterpreter *AI,
- std::string *Error);
+ std::string *Error) const;
/// executeProgramSafely - Used to create reference output with the "safe"
/// backend, if reference output is not provided. If there is a problem with
/// the code generator (e.g., llc crashes), this will return false and set
/// Error.
///
- std::string executeProgramSafely(std::string OutputFile, std::string *Error);
+ std::string executeProgramSafely(const Module *Program,
+ std::string OutputFile,
+ std::string *Error) const;
/// createReferenceFile - calls compileProgram and then records the output
/// into ReferenceOutputFile. Returns true if reference file created, false
@@ -200,23 +197,24 @@ public:
/// is different, 1 is returned. If there is a problem with the code
/// generator (e.g., llc crashes), this will return -1 and set Error.
///
- bool diffProgram(const std::string &BitcodeFile = "",
+ bool diffProgram(const Module *Program,
+ const std::string &BitcodeFile = "",
const std::string &SharedObj = "",
bool RemoveBitcode = false,
- std::string *Error = 0);
+ std::string *Error = 0) const;
- /// EmitProgressBitcode - This function is used to output the current Program
- /// to a file named "bugpoint-ID.bc".
+ /// EmitProgressBitcode - This function is used to output M to a file named
+ /// "bugpoint-ID.bc".
///
- void EmitProgressBitcode(const std::string &ID, bool NoFlyer = false);
+ void EmitProgressBitcode(const Module *M, const std::string &ID,
+ bool NoFlyer = false) const;
/// deleteInstructionFromProgram - This method clones the current Program and
/// deletes the specified instruction from the cloned module. It then runs a
/// series of cleanup passes (ADCE and SimplifyCFG) to eliminate any code
/// which depends on the value. The modified module is then returned.
///
- Module *deleteInstructionFromProgram(const Instruction *I, unsigned Simp)
- const;
+ Module *deleteInstructionFromProgram(const Instruction *I, unsigned Simp);
/// performFinalCleanups - This method clones the current Program and performs
/// a series of cleanups intended to get rid of extra cruft on the module. If
@@ -243,7 +241,7 @@ public:
/// failure. If AutoDebugCrashes is set to true, then bugpoint will
/// automatically attempt to track down a crashing pass if one exists, and
/// this method will never return null.
- Module *runPassesOn(Module *M, const std::vector<const PassInfo*> &Passes,
+ Module *runPassesOn(Module *M, const std::vector<std::string> &Passes,
bool AutoDebugCrashes = false, unsigned NumExtraArgs = 0,
const char * const *ExtraArgs = NULL);
@@ -256,7 +254,8 @@ public:
/// or failed, unless Quiet is set. ExtraArgs specifies additional arguments
/// to pass to the child bugpoint instance.
///
- bool runPasses(const std::vector<const PassInfo*> &PassesToRun,
+ bool runPasses(Module *Program,
+ const std::vector<std::string> &PassesToRun,
std::string &OutputFilename, bool DeleteOutput = false,
bool Quiet = false, unsigned NumExtraArgs = 0,
const char * const *ExtraArgs = NULL) const;
@@ -268,28 +267,26 @@ public:
/// If the passes did not compile correctly, output the command required to
/// recreate the failure. This returns true if a compiler error is found.
///
- bool runManyPasses(const std::vector<const PassInfo*> &AllPasses,
+ bool runManyPasses(const std::vector<std::string> &AllPasses,
std::string &ErrMsg);
/// writeProgramToFile - This writes the current "Program" to the named
/// bitcode file. If an error occurs, true is returned.
///
- bool writeProgramToFile(const std::string &Filename, Module *M = 0) const;
+ bool writeProgramToFile(const std::string &Filename, const Module *M) const;
private:
/// runPasses - Just like the method above, but this just returns true or
/// false indicating whether or not the optimizer crashed on the specified
/// input (true = crashed).
///
- bool runPasses(const std::vector<const PassInfo*> &PassesToRun,
+ bool runPasses(Module *M,
+ const std::vector<std::string> &PassesToRun,
bool DeleteOutput = true) const {
std::string Filename;
- return runPasses(PassesToRun, Filename, DeleteOutput);
+ return runPasses(M, PassesToRun, Filename, DeleteOutput);
}
- /// runAsChild - The actual "runPasses" guts that runs in a child process.
- int runPassesAsChild(const std::vector<const PassInfo*> &PassesToRun);
-
/// initializeExecutionEnvironment - This method is used to set up the
/// environment for executing LLVM programs.
///
@@ -306,7 +303,7 @@ Module *ParseInputFile(const std::string &InputFilename,
/// getPassesString - Turn a list of passes into a string which indicates the
/// command line options that must be passed to add the passes.
///
-std::string getPassesString(const std::vector<const PassInfo*> &Passes);
+std::string getPassesString(const std::vector<std::string> &Passes);
/// PrintFunctionList - prints out list of problematic functions
///
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index 34b759fd8794..e06feb100312 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -9,7 +9,6 @@ add_llvm_tool(bugpoint
FindBugs.cpp
Miscompilation.cpp
OptimizerDriver.cpp
- TestPasses.cpp
ToolRunner.cpp
bugpoint.cpp
)
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index 2d0631cdad90..57dc1c830c10 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -43,7 +43,7 @@ namespace {
}
namespace llvm {
- class ReducePassList : public ListReducer<const PassInfo*> {
+ class ReducePassList : public ListReducer<std::string> {
BugDriver &BD;
public:
ReducePassList(BugDriver &bd) : BD(bd) {}
@@ -52,15 +52,15 @@ namespace llvm {
// running the "Kept" passes fail when run on the output of the "removed"
// passes. If we return true, we update the current module of bugpoint.
//
- virtual TestResult doTest(std::vector<const PassInfo*> &Removed,
- std::vector<const PassInfo*> &Kept,
+ virtual TestResult doTest(std::vector<std::string> &Removed,
+ std::vector<std::string> &Kept,
std::string &Error);
};
}
ReducePassList::TestResult
-ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
- std::vector<const PassInfo*> &Suffix,
+ReducePassList::doTest(std::vector<std::string> &Prefix,
+ std::vector<std::string> &Suffix,
std::string &Error) {
sys::Path PrefixOutput;
Module *OrigProgram = 0;
@@ -68,7 +68,7 @@ ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
outs() << "Checking to see if these passes crash: "
<< getPassesString(Prefix) << ": ";
std::string PfxOutput;
- if (BD.runPasses(Prefix, PfxOutput))
+ if (BD.runPasses(BD.getProgram(), Prefix, PfxOutput))
return KeepPrefix;
PrefixOutput.set(PfxOutput);
@@ -86,7 +86,7 @@ ReducePassList::doTest(std::vector<const PassInfo*> &Prefix,
outs() << "Checking to see if these passes crash: "
<< getPassesString(Suffix) << ": ";
- if (BD.runPasses(Suffix)) {
+ if (BD.runPasses(BD.getProgram(), Suffix)) {
delete OrigProgram; // The suffix crashes alone...
return KeepSuffix;
}
@@ -106,10 +106,10 @@ namespace {
///
class ReduceCrashingGlobalVariables : public ListReducer<GlobalVariable*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *);
+ bool (*TestFn)(const BugDriver &, Module *);
public:
ReduceCrashingGlobalVariables(BugDriver &bd,
- bool (*testFn)(BugDriver &, Module *))
+ bool (*testFn)(const BugDriver &, Module *))
: BD(bd), TestFn(testFn) {}
virtual TestResult doTest(std::vector<GlobalVariable*> &Prefix,
@@ -176,10 +176,10 @@ namespace llvm {
///
class ReduceCrashingFunctions : public ListReducer<Function*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *);
+ bool (*TestFn)(const BugDriver &, Module *);
public:
ReduceCrashingFunctions(BugDriver &bd,
- bool (*testFn)(BugDriver &, Module *))
+ bool (*testFn)(const BugDriver &, Module *))
: BD(bd), TestFn(testFn) {}
virtual TestResult doTest(std::vector<Function*> &Prefix,
@@ -249,9 +249,10 @@ namespace {
///
class ReduceCrashingBlocks : public ListReducer<const BasicBlock*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *);
+ bool (*TestFn)(const BugDriver &, Module *);
public:
- ReduceCrashingBlocks(BugDriver &bd, bool (*testFn)(BugDriver &, Module *))
+ ReduceCrashingBlocks(BugDriver &bd,
+ bool (*testFn)(const BugDriver &, Module *))
: BD(bd), TestFn(testFn) {}
virtual TestResult doTest(std::vector<const BasicBlock*> &Prefix,
@@ -311,17 +312,24 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
// a "persistent mapping" by turning basic blocks into <function, name> pairs.
// This won't work well if blocks are unnamed, but that is just the risk we
// have to take.
- std::vector<std::pair<Function*, std::string> > BlockInfo;
+ std::vector<std::pair<std::string, std::string> > BlockInfo;
for (SmallPtrSet<BasicBlock*, 8>::iterator I = Blocks.begin(),
E = Blocks.end(); I != E; ++I)
- BlockInfo.push_back(std::make_pair((*I)->getParent(), (*I)->getName()));
+ BlockInfo.push_back(std::make_pair((*I)->getParent()->getName(),
+ (*I)->getName()));
// Now run the CFG simplify pass on the function...
- PassManager Passes;
- Passes.add(createCFGSimplificationPass());
- Passes.add(createVerifierPass());
- Passes.run(*M);
+ std::vector<std::string> Passes;
+ Passes.push_back("simplifycfg");
+ Passes.push_back("verify");
+ Module *New = BD.runPassesOn(M, Passes);
+ delete M;
+ if (!New) {
+ errs() << "simplifycfg failed!\n";
+ exit(1);
+ }
+ M = New;
// Try running on the hacked up program...
if (TestFn(BD, M)) {
@@ -330,8 +338,10 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
// Make sure to use basic block pointers that point into the now-current
// module, and that they don't include any deleted blocks.
BBs.clear();
+ const ValueSymbolTable &GST = M->getValueSymbolTable();
for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
- ValueSymbolTable &ST = BlockInfo[i].first->getValueSymbolTable();
+ Function *F = cast<Function>(GST.lookup(BlockInfo[i].first));
+ ValueSymbolTable &ST = F->getValueSymbolTable();
Value* V = ST.lookup(BlockInfo[i].second);
if (V && V->getType() == Type::getLabelTy(V->getContext()))
BBs.push_back(cast<BasicBlock>(V));
@@ -348,10 +358,10 @@ namespace {
///
class ReduceCrashingInstructions : public ListReducer<const Instruction*> {
BugDriver &BD;
- bool (*TestFn)(BugDriver &, Module *);
+ bool (*TestFn)(const BugDriver &, Module *);
public:
- ReduceCrashingInstructions(BugDriver &bd, bool (*testFn)(BugDriver &,
- Module *))
+ ReduceCrashingInstructions(BugDriver &bd,
+ bool (*testFn)(const BugDriver &, Module *))
: BD(bd), TestFn(testFn) {}
virtual TestResult doTest(std::vector<const Instruction*> &Prefix,
@@ -422,7 +432,8 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
/// DebugACrash - Given a predicate that determines whether a component crashes
/// on a program, try to destructively reduce the program while still keeping
/// the predicate true.
-static bool DebugACrash(BugDriver &BD, bool (*TestFn)(BugDriver &, Module *),
+static bool DebugACrash(BugDriver &BD,
+ bool (*TestFn)(const BugDriver &, Module *),
std::string &Error) {
// See if we can get away with nuking some of the global variable initializers
// in the program...
@@ -471,7 +482,7 @@ static bool DebugACrash(BugDriver &BD, bool (*TestFn)(BugDriver &, Module *),
return true;
if (GVs.size() < OldSize)
- BD.EmitProgressBitcode("reduced-global-variables");
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-global-variables");
}
}
}
@@ -492,7 +503,7 @@ static bool DebugACrash(BugDriver &BD, bool (*TestFn)(BugDriver &, Module *),
ReduceCrashingFunctions(BD, TestFn).reduceList(Functions, Error);
if (Functions.size() < OldSize)
- BD.EmitProgressBitcode("reduced-function");
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
}
// Attempt to delete entire basic blocks at a time to speed up
@@ -509,7 +520,7 @@ static bool DebugACrash(BugDriver &BD, bool (*TestFn)(BugDriver &, Module *),
unsigned OldSize = Blocks.size();
ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
if (Blocks.size() < OldSize)
- BD.EmitProgressBitcode("reduced-blocks");
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-blocks");
}
// Attempt to delete instructions using bisection. This should help out nasty
@@ -602,12 +613,12 @@ ExitLoops:
}
}
- BD.EmitProgressBitcode("reduced-simplified");
+ BD.EmitProgressBitcode(BD.getProgram(), "reduced-simplified");
return false;
}
-static bool TestForOptimizerCrash(BugDriver &BD, Module *M) {
+static bool TestForOptimizerCrash(const BugDriver &BD, Module *M) {
return BD.runPasses(M);
}
@@ -628,14 +639,14 @@ bool BugDriver::debugOptimizerCrash(const std::string &ID) {
<< (PassesToRun.size() == 1 ? ": " : "es: ")
<< getPassesString(PassesToRun) << '\n';
- EmitProgressBitcode(ID);
+ EmitProgressBitcode(Program, ID);
bool Success = DebugACrash(*this, TestForOptimizerCrash, Error);
assert(Error.empty());
return Success;
}
-static bool TestForCodeGenCrash(BugDriver &BD, Module *M) {
+static bool TestForCodeGenCrash(const BugDriver &BD, Module *M) {
std::string Error;
BD.compileProgram(M, &Error);
if (!Error.empty()) {
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 57f12d5af824..731248493247 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -293,7 +293,7 @@ bool BugDriver::initializeExecutionEnvironment() {
/// setting Error if an error occurs. This is used for code generation
/// crash testing.
///
-void BugDriver::compileProgram(Module *M, std::string *Error) {
+void BugDriver::compileProgram(Module *M, std::string *Error) const {
// Emit the program to a bitcode file...
sys::Path BitcodeFile (OutputPrefix + "-test-program.bc");
std::string ErrMsg;
@@ -320,11 +320,12 @@ void BugDriver::compileProgram(Module *M, std::string *Error) {
/// program to a file, returning the filename of the file. A recommended
/// filename may be optionally specified.
///
-std::string BugDriver::executeProgram(std::string OutputFile,
+std::string BugDriver::executeProgram(const Module *Program,
+ std::string OutputFile,
std::string BitcodeFile,
const std::string &SharedObj,
AbstractInterpreter *AI,
- std::string *Error) {
+ std::string *Error) const {
if (AI == 0) AI = Interpreter;
assert(AI && "Interpreter should have been created already!");
bool CreatedBitcode = false;
@@ -399,9 +400,10 @@ std::string BugDriver::executeProgram(std::string OutputFile,
/// executeProgramSafely - Used to create reference output with the "safe"
/// backend, if reference output is not provided.
///
-std::string BugDriver::executeProgramSafely(std::string OutputFile,
- std::string *Error) {
- return executeProgram(OutputFile, "", "", SafeInterpreter, Error);
+std::string BugDriver::executeProgramSafely(const Module *Program,
+ std::string OutputFile,
+ std::string *Error) const {
+ return executeProgram(Program, OutputFile, "", "", SafeInterpreter, Error);
}
std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
@@ -440,7 +442,7 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
if (!Error.empty())
return false;
- ReferenceOutputFile = executeProgramSafely(Filename, &Error);
+ ReferenceOutputFile = executeProgramSafely(Program, Filename, &Error);
if (!Error.empty()) {
errs() << Error;
if (Interpreter != SafeInterpreter) {
@@ -460,12 +462,14 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
/// is different, 1 is returned. If there is a problem with the code
/// generator (e.g., llc crashes), this will return -1 and set Error.
///
-bool BugDriver::diffProgram(const std::string &BitcodeFile,
+bool BugDriver::diffProgram(const Module *Program,
+ const std::string &BitcodeFile,
const std::string &SharedObject,
bool RemoveBitcode,
- std::string *ErrMsg) {
+ std::string *ErrMsg) const {
// Execute the program, generating an output file...
- sys::Path Output(executeProgram("", BitcodeFile, SharedObject, 0, ErrMsg));
+ sys::Path Output(executeProgram(Program, "", BitcodeFile, SharedObject, 0,
+ ErrMsg));
if (!ErrMsg->empty())
return false;
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index d5611b58ae20..524f130ba751 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -55,13 +55,14 @@ namespace {
/// depends on the value. The modified module is then returned.
///
Module *BugDriver::deleteInstructionFromProgram(const Instruction *I,
- unsigned Simplification) const {
- Module *Result = CloneModule(Program);
+ unsigned Simplification) {
+ // FIXME, use vmap?
+ Module *Clone = CloneModule(Program);
const BasicBlock *PBB = I->getParent();
const Function *PF = PBB->getParent();
- Module::iterator RFI = Result->begin(); // Get iterator to corresponding fn
+ Module::iterator RFI = Clone->begin(); // Get iterator to corresponding fn
std::advance(RFI, std::distance(PF->getParent()->begin(),
Module::const_iterator(PF)));
@@ -79,30 +80,23 @@ Module *BugDriver::deleteInstructionFromProgram(const Instruction *I,
// Remove the instruction from the program.
TheInst->getParent()->getInstList().erase(TheInst);
-
- //writeProgramToFile("current.bc", Result);
-
// Spiff up the output a little bit.
- PassManager Passes;
- // Make sure that the appropriate target data is always used...
- Passes.add(new TargetData(Result));
+ std::vector<std::string> Passes;
- /// FIXME: If this used runPasses() like the methods below, we could get rid
- /// of the -disable-* options!
+ /// Can we get rid of the -disable-* options?
if (Simplification > 1 && !NoDCE)
- Passes.add(createDeadCodeEliminationPass());
+ Passes.push_back("dce");
if (Simplification && !DisableSimplifyCFG)
- Passes.add(createCFGSimplificationPass()); // Delete dead control flow
-
- Passes.add(createVerifierPass());
- Passes.run(*Result);
- return Result;
-}
-
-static const PassInfo *getPI(Pass *P) {
- const PassInfo *PI = P->getPassInfo();
- delete P;
- return PI;
+ Passes.push_back("simplifycfg"); // Delete dead control flow
+
+ Passes.push_back("verify");
+ Module *New = runPassesOn(Clone, Passes);
+ delete Clone;
+ if (!New) {
+ errs() << "Instruction removal failed. Sorry. :( Please report a bug!\n";
+ exit(1);
+ }
+ return New;
}
/// performFinalCleanups - This method clones the current Program and performs
@@ -114,15 +108,15 @@ Module *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
I->setLinkage(GlobalValue::ExternalLinkage);
- std::vector<const PassInfo*> CleanupPasses;
- CleanupPasses.push_back(getPI(createGlobalDCEPass()));
+ std::vector<std::string> CleanupPasses;
+ CleanupPasses.push_back("globaldce");
if (MayModifySemantics)
- CleanupPasses.push_back(getPI(createDeadArgHackingPass()));
+ CleanupPasses.push_back("deadarghaX0r");
else
- CleanupPasses.push_back(getPI(createDeadArgEliminationPass()));
+ CleanupPasses.push_back("deadargelim");
- CleanupPasses.push_back(getPI(createDeadTypeEliminationPass()));
+ CleanupPasses.push_back("deadtypeelim");
Module *New = runPassesOn(M, CleanupPasses);
if (New == 0) {
@@ -138,16 +132,14 @@ Module *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
/// function. This returns null if there are no extractable loops in the
/// program or if the loop extractor crashes.
Module *BugDriver::ExtractLoop(Module *M) {
- std::vector<const PassInfo*> LoopExtractPasses;
- LoopExtractPasses.push_back(getPI(createSingleLoopExtractorPass()));
+ std::vector<std::string> LoopExtractPasses;
+ LoopExtractPasses.push_back("loop-extract-single");
Module *NewM = runPassesOn(M, LoopExtractPasses);
if (NewM == 0) {
- Module *Old = swapProgramIn(M);
outs() << "*** Loop extraction failed: ";
- EmitProgressBitcode("loopextraction", true);
+ EmitProgressBitcode(M, "loopextraction", true);
outs() << "*** Sorry. :( Please report a bug!\n";
- swapProgramIn(Old);
return 0;
}
@@ -201,7 +193,7 @@ static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
/// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
/// prune appropriate entries out of M1s list.
static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
- ValueMap<const Value*, Value*> VMap) {
+ ValueMap<const Value*, Value*> &VMap) {
GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
!GV->use_empty()) return;
@@ -327,22 +319,18 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
if (uniqueFilename.createTemporaryFileOnDisk(true, &ErrMsg)) {
outs() << "*** Basic Block extraction failed!\n";
errs() << "Error creating temporary file: " << ErrMsg << "\n";
- M = swapProgramIn(M);
- EmitProgressBitcode("basicblockextractfail", true);
- swapProgramIn(M);
+ EmitProgressBitcode(M, "basicblockextractfail", true);
return 0;
}
sys::RemoveFileOnSignal(uniqueFilename);
std::string ErrorInfo;
- raw_fd_ostream BlocksToNotExtractFile(uniqueFilename.c_str(), ErrorInfo);
+ tool_output_file BlocksToNotExtractFile(uniqueFilename.c_str(), ErrorInfo);
if (!ErrorInfo.empty()) {
outs() << "*** Basic Block extraction failed!\n";
errs() << "Error writing list of blocks to not extract: " << ErrorInfo
<< "\n";
- M = swapProgramIn(M);
- EmitProgressBitcode("basicblockextractfail", true);
- swapProgramIn(M);
+ EmitProgressBitcode(M, "basicblockextractfail", true);
return 0;
}
for (std::vector<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
@@ -351,26 +339,31 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
// If the BB doesn't have a name, give it one so we have something to key
// off of.
if (!BB->hasName()) BB->setName("tmpbb");
- BlocksToNotExtractFile << BB->getParent()->getNameStr() << " "
- << BB->getName() << "\n";
+ BlocksToNotExtractFile.os() << BB->getParent()->getNameStr() << " "
+ << BB->getName() << "\n";
+ }
+ BlocksToNotExtractFile.os().close();
+ if (BlocksToNotExtractFile.os().has_error()) {
+ errs() << "Error writing list of blocks to not extract: " << ErrorInfo
+ << "\n";
+ EmitProgressBitcode(M, "basicblockextractfail", true);
+ BlocksToNotExtractFile.os().clear_error();
+ return 0;
}
- BlocksToNotExtractFile.close();
+ BlocksToNotExtractFile.keep();
std::string uniqueFN = "--extract-blocks-file=" + uniqueFilename.str();
const char *ExtraArg = uniqueFN.c_str();
- std::vector<const PassInfo*> PI;
- std::vector<BasicBlock *> EmptyBBs; // This parameter is ignored.
- PI.push_back(getPI(createBlockExtractorPass(EmptyBBs)));
+ std::vector<std::string> PI;
+ PI.push_back("extract-blocks");
Module *Ret = runPassesOn(M, PI, false, 1, &ExtraArg);
uniqueFilename.eraseFromDisk(); // Free disk space
if (Ret == 0) {
outs() << "*** Basic Block extraction failed, please report a bug!\n";
- M = swapProgramIn(M);
- EmitProgressBitcode("basicblockextractfail", true);
- swapProgramIn(M);
+ EmitProgressBitcode(M, "basicblockextractfail", true);
}
return Ret;
}
diff --git a/tools/bugpoint/FindBugs.cpp b/tools/bugpoint/FindBugs.cpp
index 224c71747a6f..a291f9fb0f99 100644
--- a/tools/bugpoint/FindBugs.cpp
+++ b/tools/bugpoint/FindBugs.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
/// If the passes did not compile correctly, output the command required to
/// recreate the failure. This returns true if a compiler error is found.
///
-bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses,
+bool BugDriver::runManyPasses(const std::vector<std::string> &AllPasses,
std::string &ErrMsg) {
setPassesToRun(AllPasses);
outs() << "Starting bug finding procedure...\n\n";
@@ -58,11 +58,11 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses,
//
outs() << "Running selected passes on program to test for crash: ";
for(int i = 0, e = PassesToRun.size(); i != e; i++) {
- outs() << "-" << PassesToRun[i]->getPassArgument() << " ";
+ outs() << "-" << PassesToRun[i] << " ";
}
std::string Filename;
- if(runPasses(PassesToRun, Filename, false)) {
+ if(runPasses(Program, PassesToRun, Filename, false)) {
outs() << "\n";
outs() << "Optimizer passes caused failure!\n\n";
debugOptimizerCrash();
@@ -89,7 +89,7 @@ bool BugDriver::runManyPasses(const std::vector<const PassInfo*> &AllPasses,
// output (created above).
//
outs() << "*** Checking if passes caused miscompliation:\n";
- bool Diff = diffProgram(Filename, "", false, &Error);
+ bool Diff = diffProgram(Program, Filename, "", false, &Error);
if (Error.empty() && Diff) {
outs() << "\n*** diffProgram returned true!\n";
debugMiscompilation(&Error);
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 47ac3c5c4d3a..3f2b6968718b 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -43,13 +43,13 @@ namespace {
cl::desc("Don't extract blocks when searching for miscompilations"),
cl::init(false));
- class ReduceMiscompilingPasses : public ListReducer<const PassInfo*> {
+ class ReduceMiscompilingPasses : public ListReducer<std::string> {
BugDriver &BD;
public:
ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
- virtual TestResult doTest(std::vector<const PassInfo*> &Prefix,
- std::vector<const PassInfo*> &Suffix,
+ virtual TestResult doTest(std::vector<std::string> &Prefix,
+ std::vector<std::string> &Suffix,
std::string &Error);
};
}
@@ -58,8 +58,8 @@ namespace {
/// group, see if they still break the program.
///
ReduceMiscompilingPasses::TestResult
-ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
- std::vector<const PassInfo*> &Suffix,
+ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
+ std::vector<std::string> &Suffix,
std::string &Error) {
// First, run the program with just the Suffix passes. If it is still broken
// with JUST the kept passes, discard the prefix passes.
@@ -67,17 +67,18 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
<< "' compiles correctly: ";
std::string BitcodeResult;
- if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
+ if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
+ true/*quiet*/)) {
errs() << " Error running this sequence of passes"
<< " on the input program!\n";
BD.setPassesToRun(Suffix);
- BD.EmitProgressBitcode("pass-error", false);
+ BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
exit(BD.debugOptimizerCrash());
}
// Check to see if the finished program matches the reference output...
- bool Diff = BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/,
- &Error);
+ bool Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
+ true /*delete bitcode*/, &Error);
if (!Error.empty())
return InternalError;
if (Diff) {
@@ -104,16 +105,17 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
// kept passes, we can update our bitcode file to include the result of the
// prefix passes, then discard the prefix passes.
//
- if (BD.runPasses(Prefix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
+ if (BD.runPasses(BD.getProgram(), Prefix, BitcodeResult, false/*delete*/,
+ true/*quiet*/)) {
errs() << " Error running this sequence of passes"
<< " on the input program!\n";
BD.setPassesToRun(Prefix);
- BD.EmitProgressBitcode("pass-error", false);
+ BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
exit(BD.debugOptimizerCrash());
}
// If the prefix maintains the predicate by itself, only keep the prefix!
- Diff = BD.diffProgram(BitcodeResult, "", false, &Error);
+ Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "", false, &Error);
if (!Error.empty())
return InternalError;
if (Diff) {
@@ -144,16 +146,18 @@ ReduceMiscompilingPasses::doTest(std::vector<const PassInfo*> &Prefix,
<< getPassesString(Prefix) << "' passes: ";
OwningPtr<Module> OriginalInput(BD.swapProgramIn(PrefixOutput.take()));
- if (BD.runPasses(Suffix, BitcodeResult, false/*delete*/, true/*quiet*/)) {
+ if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
+ true/*quiet*/)) {
errs() << " Error running this sequence of passes"
<< " on the input program!\n";
BD.setPassesToRun(Suffix);
- BD.EmitProgressBitcode("pass-error", false);
+ BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
exit(BD.debugOptimizerCrash());
}
// Run the result...
- Diff = BD.diffProgram(BitcodeResult, "", true /*delete bitcode*/, &Error);
+ Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
+ true /*delete bitcode*/, &Error);
if (!Error.empty())
return InternalError;
if (Diff) {
@@ -198,18 +202,20 @@ namespace {
return NoFailure;
}
- int TestFuncs(const std::vector<Function*> &Prefix, std::string &Error);
+ bool TestFuncs(const std::vector<Function*> &Prefix, std::string &Error);
};
}
/// TestMergedProgram - Given two modules, link them together and run the
-/// program, checking to see if the program matches the diff. If the diff
-/// matches, return false, otherwise return true. If the DeleteInputs argument
-/// is set to true then this function deletes both input modules before it
-/// returns.
+/// program, checking to see if the program matches the diff. If there is
+/// an error, return NULL. If not, return the merged module. The Broken argument
+/// will be set to true if the output is different. If the DeleteInputs
+/// argument is set to true then this function deletes both input
+/// modules before it returns.
///
-static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
- bool DeleteInputs, std::string &Error) {
+static Module *TestMergedProgram(const BugDriver &BD, Module *M1, Module *M2,
+ bool DeleteInputs, std::string &Error,
+ bool &Broken) {
// Link the two portions of the program back to together.
std::string ErrorMsg;
if (!DeleteInputs) {
@@ -223,24 +229,22 @@ static bool TestMergedProgram(BugDriver &BD, Module *M1, Module *M2,
}
delete M2; // We are done with this module.
- OwningPtr<Module> OldProgram(BD.swapProgramIn(M1));
-
- // Execute the program. If it does not match the expected output, we must
- // return true.
- bool Broken = BD.diffProgram("", "", false, &Error);
+ // Execute the program.
+ Broken = BD.diffProgram(M1, "", "", false, &Error);
if (!Error.empty()) {
- // Delete the linked module & restore the original
- delete BD.swapProgramIn(OldProgram.take());
+ // Delete the linked module
+ delete M1;
+ return NULL;
}
- return Broken;
+ return M1;
}
/// TestFuncs - split functions in a Module into two groups: those that are
/// under consideration for miscompilation vs. those that are not, and test
/// accordingly. Each group of functions becomes a separate Module.
///
-int ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
- std::string &Error) {
+bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
+ std::string &Error) {
// Test to see if the function is misoptimized if we ONLY run it on the
// functions listed in Funcs.
outs() << "Checking to see if the program is misoptimized when "
@@ -250,14 +254,35 @@ int ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
PrintFunctionList(Funcs);
outs() << '\n';
- // Split the module into the two halves of the program we want.
+ // Create a clone for two reasons:
+ // * If the optimization passes delete any function, the deleted function
+ // will be in the clone and Funcs will still point to valid memory
+ // * If the optimization passes use interprocedural information to break
+ // a function, we want to continue with the original function. Otherwise
+ // we can conclude that a function triggers the bug when in fact one
+ // needs a larger set of original functions to do so.
ValueMap<const Value*, Value*> VMap;
+ Module *Clone = CloneModule(BD.getProgram(), VMap);
+ Module *Orig = BD.swapProgramIn(Clone);
+
+ std::vector<Function*> FuncsOnClone;
+ for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
+ Function *F = cast<Function>(VMap[Funcs[i]]);
+ FuncsOnClone.push_back(F);
+ }
+
+ // Split the module into the two halves of the program we want.
+ VMap.clear();
Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
- Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, Funcs,
+ Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, FuncsOnClone,
VMap);
// Run the predicate, note that the predicate will delete both input modules.
- return TestFn(BD, ToOptimize, ToNotOptimize, Error);
+ bool Broken = TestFn(BD, ToOptimize, ToNotOptimize, Error);
+
+ delete BD.swapProgramIn(Orig);
+
+ return Broken;
}
/// DisambiguateGlobalSymbols - Give anonymous global values names.
@@ -307,10 +332,13 @@ static bool ExtractLoops(BugDriver &BD,
// has broken. If something broke, then we'll inform the user and stop
// extraction.
AbstractInterpreter *AI = BD.switchToSafeInterpreter();
- bool Failure = TestMergedProgram(BD, ToOptimizeLoopExtracted, ToNotOptimize,
- false, Error);
- if (!Error.empty())
+ bool Failure;
+ Module *New = TestMergedProgram(BD, ToOptimizeLoopExtracted, ToNotOptimize,
+ false, Error, Failure);
+ if (!New)
return false;
+ // Delete the original and set the new program.
+ delete BD.swapProgramIn(New);
if (Failure) {
BD.switchToInterpreter(AI);
@@ -449,18 +477,36 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
// Split the module into the two halves of the program we want.
ValueMap<const Value*, Value*> VMap;
+ Module *Clone = CloneModule(BD.getProgram(), VMap);
+ Module *Orig = BD.swapProgramIn(Clone);
+ std::vector<Function*> FuncsOnClone;
+ std::vector<BasicBlock*> BBsOnClone;
+ for (unsigned i = 0, e = FunctionsBeingTested.size(); i != e; ++i) {
+ Function *F = cast<Function>(VMap[FunctionsBeingTested[i]]);
+ FuncsOnClone.push_back(F);
+ }
+ for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
+ BasicBlock *BB = cast<BasicBlock>(VMap[BBs[i]]);
+ BBsOnClone.push_back(BB);
+ }
+ VMap.clear();
+
Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
- FunctionsBeingTested,
+ FuncsOnClone,
VMap);
// Try the extraction. If it doesn't work, then the block extractor crashed
// or something, in which case bugpoint can't chase down this possibility.
- if (Module *New = BD.ExtractMappedBlocksFromModule(BBs, ToOptimize)) {
+ if (Module *New = BD.ExtractMappedBlocksFromModule(BBsOnClone, ToOptimize)) {
delete ToOptimize;
- // Run the predicate, not that the predicate will delete both input modules.
- return TestFn(BD, New, ToNotOptimize, Error);
+ // Run the predicate,
+ // note that the predicate will delete both input modules.
+ bool Ret = TestFn(BD, New, ToNotOptimize, Error);
+ delete BD.swapProgramIn(Orig);
+ return Ret;
}
+ delete BD.swapProgramIn(Orig);
delete ToOptimize;
delete ToNotOptimize;
return false;
@@ -655,8 +701,13 @@ static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe,
delete Test;
outs() << " Checking to see if the merged program executes correctly: ";
- bool Broken = TestMergedProgram(BD, Optimized, Safe, true, Error);
- if (Error.empty()) outs() << (Broken ? " nope.\n" : " yup.\n");
+ bool Broken;
+ Module *New = TestMergedProgram(BD, Optimized, Safe, true, Error, Broken);
+ if (New) {
+ outs() << (Broken ? " nope.\n" : " yup.\n");
+ // Delete the original and set the new program.
+ delete BD.swapProgramIn(New);
+ }
return Broken;
}
@@ -678,7 +729,7 @@ void BugDriver::debugMiscompilation(std::string *Error) {
outs() << "\n*** Found miscompiling pass"
<< (getPassesToRun().size() == 1 ? "" : "es") << ": "
<< getPassesString(getPassesToRun()) << '\n';
- EmitProgressBitcode("passinput");
+ EmitProgressBitcode(Program, "passinput");
std::vector<Function *> MiscompiledFunctions =
DebugAMiscompilation(*this, TestOptimizer, *Error);
@@ -694,14 +745,12 @@ void BugDriver::debugMiscompilation(std::string *Error) {
VMap);
outs() << " Non-optimized portion: ";
- ToNotOptimize = swapProgramIn(ToNotOptimize);
- EmitProgressBitcode("tonotoptimize", true);
- setNewProgram(ToNotOptimize); // Delete hacked module.
+ EmitProgressBitcode(ToNotOptimize, "tonotoptimize", true);
+ delete ToNotOptimize; // Delete hacked module.
outs() << " Portion that is input to optimizer: ";
- ToOptimize = swapProgramIn(ToOptimize);
- EmitProgressBitcode("tooptimize");
- setNewProgram(ToOptimize); // Delete hacked module.
+ EmitProgressBitcode(ToOptimize, "tooptimize");
+ delete ToOptimize; // Delete hacked module.
return;
}
@@ -921,7 +970,8 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
// Run the code generator on the `Test' code, loading the shared library.
// The function returns whether or not the new output differs from reference.
- bool Result = BD.diffProgram(TestModuleBC.str(), SharedObject, false, &Error);
+ bool Result = BD.diffProgram(BD.getProgram(), TestModuleBC.str(),
+ SharedObject, false, &Error);
if (!Error.empty())
return false;
@@ -938,7 +988,8 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
///
bool BugDriver::debugCodeGenerator(std::string *Error) {
if ((void*)SafeInterpreter == (void*)Interpreter) {
- std::string Result = executeProgramSafely("bugpoint.safe.out", Error);
+ std::string Result = executeProgramSafely(Program, "bugpoint.safe.out",
+ Error);
if (Error->empty()) {
outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
<< "the reference diff. This may be due to a\n front-end "
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 3a6149b24a52..3600ca6a81e3 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -27,6 +27,8 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
#include "llvm/System/Program.h"
@@ -51,26 +53,34 @@ namespace {
/// file. If an error occurs, true is returned.
///
bool BugDriver::writeProgramToFile(const std::string &Filename,
- Module *M) const {
+ const Module *M) const {
std::string ErrInfo;
- raw_fd_ostream Out(Filename.c_str(), ErrInfo,
- raw_fd_ostream::F_Binary);
- if (!ErrInfo.empty()) return true;
-
- WriteBitcodeToFile(M ? M : Program, Out);
- return false;
+ tool_output_file Out(Filename.c_str(), ErrInfo,
+ raw_fd_ostream::F_Binary);
+ if (ErrInfo.empty()) {
+ WriteBitcodeToFile(M, Out.os());
+ Out.os().close();
+ if (!Out.os().has_error()) {
+ Out.keep();
+ return false;
+ }
+ }
+ Out.os().clear_error();
+ return true;
}
/// EmitProgressBitcode - This function is used to output the current Program
/// to a file named "bugpoint-ID.bc".
///
-void BugDriver::EmitProgressBitcode(const std::string &ID, bool NoFlyer) {
+void BugDriver::EmitProgressBitcode(const Module *M,
+ const std::string &ID,
+ bool NoFlyer) const {
// Output the input to the current pass to a bitcode file, emit a message
// telling the user how to reproduce it: opt -foo blah.bc
//
std::string Filename = OutputPrefix + "-" + ID + ".bc";
- if (writeProgramToFile(Filename)) {
+ if (writeProgramToFile(Filename, M)) {
errs() << "Error opening file '" << Filename << "' for writing!\n";
return;
}
@@ -83,39 +93,12 @@ void BugDriver::EmitProgressBitcode(const std::string &ID, bool NoFlyer) {
outs() << getPassesString(PassesToRun) << "\n";
}
-int BugDriver::runPassesAsChild(const std::vector<const PassInfo*> &Passes) {
- std::string ErrInfo;
- raw_fd_ostream OutFile(ChildOutput.c_str(), ErrInfo,
- raw_fd_ostream::F_Binary);
- if (!ErrInfo.empty()) {
- errs() << "Error opening bitcode file: " << ChildOutput << "\n";
- return 1;
- }
-
- PassManager PM;
- // Make sure that the appropriate target data is always used...
- PM.add(new TargetData(Program));
-
- for (unsigned i = 0, e = Passes.size(); i != e; ++i) {
- if (Passes[i]->getNormalCtor())
- PM.add(Passes[i]->getNormalCtor()());
- else
- errs() << "Cannot create pass yet: " << Passes[i]->getPassName() << "\n";
- }
- // Check that the module is well formed on completion of optimization
- PM.add(createVerifierPass());
-
- // Write bitcode out to disk as the last step...
- PM.add(createBitcodeWriterPass(OutFile));
-
- // Run all queued passes.
- PM.run(*Program);
-
- return 0;
-}
-
cl::opt<bool> SilencePasses("silence-passes", cl::desc("Suppress output of running passes (both stdout and stderr)"));
+static cl::list<std::string> OptArgs("opt-args", cl::Positional,
+ cl::desc("<opt arguments>..."),
+ cl::ZeroOrMore, cl::PositionalEatsArgs);
+
/// runPasses - Run the specified passes on Program, outputting a bitcode file
/// and writing the filename into OutputFile if successful. If the
/// optimizations fail for some reason (optimizer crashes), return true,
@@ -124,7 +107,8 @@ cl::opt<bool> SilencePasses("silence-passes", cl::desc("Suppress output of runni
/// outs() a single line message indicating whether compilation was successful
/// or failed.
///
-bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
+bool BugDriver::runPasses(Module *Program,
+ const std::vector<std::string> &Passes,
std::string &OutputFilename, bool DeleteOutput,
bool Quiet, unsigned NumExtraArgs,
const char * const *ExtraArgs) const {
@@ -148,39 +132,47 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
}
std::string ErrInfo;
- raw_fd_ostream InFile(inputFilename.c_str(), ErrInfo,
- raw_fd_ostream::F_Binary);
+ tool_output_file InFile(inputFilename.c_str(), ErrInfo,
+ raw_fd_ostream::F_Binary);
if (!ErrInfo.empty()) {
errs() << "Error opening bitcode file: " << inputFilename.str() << "\n";
return 1;
}
- WriteBitcodeToFile(Program, InFile);
- InFile.close();
+ WriteBitcodeToFile(Program, InFile.os());
+ InFile.os().close();
+ if (InFile.os().has_error()) {
+ errs() << "Error writing bitcode file: " << inputFilename.str() << "\n";
+ InFile.os().clear_error();
+ return 1;
+ }
+ InFile.keep();
// setup the child process' arguments
SmallVector<const char*, 8> Args;
- sys::Path tool = sys::Program::FindProgramByName(ToolName);
+ sys::Path tool = FindExecutable("opt", getToolName(), (void*)"opt");
+ std::string Opt = tool.str();
if (UseValgrind) {
Args.push_back("valgrind");
Args.push_back("--error-exitcode=1");
Args.push_back("-q");
Args.push_back(tool.c_str());
} else
- Args.push_back(ToolName);
+ Args.push_back(Opt.c_str());
- Args.push_back("-as-child");
- Args.push_back("-child-output");
+ Args.push_back("-o");
Args.push_back(OutputFilename.c_str());
+ for (unsigned i = 0, e = OptArgs.size(); i != e; ++i)
+ Args.push_back(OptArgs[i].c_str());
std::vector<std::string> pass_args;
for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
pass_args.push_back( std::string("-load"));
pass_args.push_back( PluginLoader::getPlugin(i));
}
- for (std::vector<const PassInfo*>::const_iterator I = Passes.begin(),
+ for (std::vector<std::string>::const_iterator I = Passes.begin(),
E = Passes.end(); I != E; ++I )
- pass_args.push_back( std::string("-") + (*I)->getPassArgument() );
+ pass_args.push_back( std::string("-") + (*I) );
for (std::vector<std::string>::const_iterator I = pass_args.begin(),
E = pass_args.end(); I != E; ++I )
Args.push_back(I->c_str());
@@ -189,6 +181,12 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
Args.push_back(*ExtraArgs);
Args.push_back(0);
+ DEBUG(errs() << "\nAbout to run:\t";
+ for (unsigned i = 0, e = Args.size()-1; i != e; ++i)
+ errs() << " " << Args[i];
+ errs() << "\n";
+ );
+
sys::Path prog;
if (UseValgrind)
prog = sys::Program::FindProgramByName("valgrind");
@@ -235,27 +233,22 @@ bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
/// module, returning the transformed module on success, or a null pointer on
/// failure.
Module *BugDriver::runPassesOn(Module *M,
- const std::vector<const PassInfo*> &Passes,
+ const std::vector<std::string> &Passes,
bool AutoDebugCrashes, unsigned NumExtraArgs,
const char * const *ExtraArgs) {
- Module *OldProgram = swapProgramIn(M);
std::string BitcodeResult;
- if (runPasses(Passes, BitcodeResult, false/*delete*/, true/*quiet*/,
+ if (runPasses(M, Passes, BitcodeResult, false/*delete*/, true/*quiet*/,
NumExtraArgs, ExtraArgs)) {
if (AutoDebugCrashes) {
errs() << " Error running this sequence of passes"
<< " on the input program!\n";
- delete OldProgram;
- EmitProgressBitcode("pass-error", false);
+ delete swapProgramIn(M);
+ EmitProgressBitcode(M, "pass-error", false);
exit(debugOptimizerCrash());
}
- swapProgramIn(OldProgram);
return 0;
}
- // Restore the current program.
- swapProgramIn(OldProgram);
-
Module *Ret = ParseInputFile(BitcodeResult, Context);
if (Ret == 0) {
errs() << getToolName() << ": Error reading bitcode file '"
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 3149a7a494a9..36dbe144c1c5 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -627,8 +627,8 @@ CBE *AbstractInterpreter::createCBE(const char *Argv0,
// GCC abstraction
//
-static bool IsARMArchitecture(std::vector<std::string> Args) {
- for (std::vector<std::string>::const_iterator
+static bool IsARMArchitecture(std::vector<const char*> Args) {
+ for (std::vector<const char*>::const_iterator
I = Args.begin(), E = Args.end(); I != E; ++I) {
if (StringRef(*I).equals_lower("-arch")) {
++I;
@@ -673,7 +673,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
// explicitly told what architecture it is working on, so we get
// it from gcc flags
if ((TargetTriple.getOS() == Triple::Darwin) &&
- !IsARMArchitecture(ArgsForGCC))
+ !IsARMArchitecture(GCCArgs))
GCCArgs.push_back("-force_cpusubtype_ALL");
}
}
@@ -721,6 +721,10 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
std::vector<const char*> ProgramArgs;
+ // Declared here so that the destructor only runs after
+ // ProgramArgs is used.
+ std::string Exec;
+
if (RemoteClientPath.isEmpty())
ProgramArgs.push_back(OutputBinary.c_str());
else {
@@ -741,7 +745,7 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
// Full path to the binary. We need to cd to the exec directory because
// there is a dylib there that the exec expects to find in the CWD
char* env_pwd = getenv("PWD");
- std::string Exec = "cd ";
+ Exec = "cd ";
Exec += env_pwd;
Exec += "; ./";
Exec += OutputBinary.c_str();
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index ba5234bdc89d..79cf563ec6fd 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -29,13 +29,6 @@
#include "llvm/LinkAllVMCore.h"
using namespace llvm;
-// AsChild - Specifies that this invocation of bugpoint is being generated
-// from a parent process. It is not intended to be used by users so the
-// option is hidden.
-static cl::opt<bool>
-AsChild("as-child", cl::desc("Run bugpoint as child process"),
- cl::ReallyHidden);
-
static cl::opt<bool>
FindBugs("find-bugs", cl::desc("Run many different optimization sequences "
"on program to find bugs"), cl::init(false));
@@ -90,8 +83,9 @@ namespace {
AddToDriver(BugDriver &_D) : D(_D) {}
virtual void add(Pass *P) {
- const PassInfo *PI = P->getPassInfo();
- D.addPasses(&PI, &PI + 1);
+ const void *ID = P->getPassID();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+ D.addPass(PI->getPassArgument());
}
};
}
@@ -110,8 +104,8 @@ int main(int argc, char **argv) {
// If we have an override, set it and then track the triple we want Modules
// to use.
if (!OverrideTriple.empty()) {
- TargetTriple.setTriple(OverrideTriple);
- outs() << "Override triple set to '" << OverrideTriple << "'\n";
+ TargetTriple.setTriple(Triple::normalize(OverrideTriple));
+ outs() << "Override triple set to '" << TargetTriple.getTriple() << "'\n";
}
if (MemoryLimit < 0) {
@@ -123,7 +117,7 @@ int main(int argc, char **argv) {
MemoryLimit = 100;
}
- BugDriver D(argv[0], AsChild, FindBugs, TimeoutValue, MemoryLimit,
+ BugDriver D(argv[0], FindBugs, TimeoutValue, MemoryLimit,
UseValgrind, Context);
if (D.addSources(InputFilenames)) return 1;
@@ -143,7 +137,13 @@ int main(int argc, char **argv) {
/*RunInliner=*/true,
/*VerifyEach=*/false);
- D.addPasses(PassList.begin(), PassList.end());
+
+ for (std::vector<const PassInfo*>::iterator I = PassList.begin(),
+ E = PassList.end();
+ I != E; ++I) {
+ const PassInfo* PI = *I;
+ D.addPass(PI->getPassArgument());
+ }
// Bugpoint has the ability of generating a plethora of core files, so to
// avoid filling up the disk, we prevent it
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
index f7a199d597b4..2019995dcb79 100644
--- a/tools/edis/CMakeLists.txt
+++ b/tools/edis/CMakeLists.txt
@@ -1,22 +1,10 @@
set(LLVM_NO_RTTI 1)
-add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
- COMMAND ${LLVM_TABLEGEN_EXE} -o ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
- -gen-enhanced-disassembly-header ${CMAKE_CURRENT_SOURCE_DIR}/EDInfo.td
- DEPENDS tblgen
- COMMENT "Building enhanced disassembly semantic information header (EDInfo.inc)")
-set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc PROPERTIES GENERATED 1)
-
include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_llvm_library(EnhancedDisassembly
- EDDisassembler.cpp
- EDInst.cpp
- EDMain.cpp
- EDOperand.cpp
- EDToken.cpp
../../include/llvm-c/EnhancedDisassembly.h
- ${CMAKE_CURRENT_BINARY_DIR}/EDInfo.inc
+ EDMain.cpp
)
set_target_properties(EnhancedDisassembly
diff --git a/tools/edis/EDInfo.td b/tools/edis/EDInfo.td
deleted file mode 100644
index bd9ec079d80d..000000000000
--- a/tools/edis/EDInfo.td
+++ /dev/null
@@ -1 +0,0 @@
-// Intentionally empty.
diff --git a/tools/edis/EDMain.cpp b/tools/edis/EDMain.cpp
index b6ca32f2db86..16855b3f45d8 100644
--- a/tools/edis/EDMain.cpp
+++ b/tools/edis/EDMain.cpp
@@ -1,4 +1,4 @@
-//===-EDMain.cpp - LLVM Enhanced Disassembly C API ------------------------===//
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,33 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "EDDisassembler.h"
-#include "EDInst.h"
-#include "EDOperand.h"
-#include "EDToken.h"
-
+// FIXME: This code isn't layered right, the headers should be moved to
+// include llvm/MC/MCDisassembler or something.
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
int EDGetDisassembler(EDDisassemblerRef *disassembler,
const char *triple,
EDAssemblySyntax_t syntax) {
EDDisassembler::initialize();
- EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple,
- syntax);
+ EDDisassembler::AssemblySyntax Syntax;
+ switch (syntax) {
+ default: assert(0 && "Unknown assembly syntax!");
+ case kEDAssemblySyntaxX86Intel:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+ break;
+ case kEDAssemblySyntaxX86ATT:
+ Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+ break;
+ case kEDAssemblySyntaxARMUAL:
+ Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+ break;
+ }
+
+ EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
- if (ret) {
- *disassembler = ret;
- return 0;
- } else {
+ if (!ret)
return -1;
- }
+ *disassembler = ret;
+ return 0;
}
int EDGetRegisterName(const char** regName,
EDDisassemblerRef disassembler,
unsigned regID) {
- const char* name = disassembler->nameWithRegisterID(regID);
+ const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
if (!name)
return -1;
*regName = name;
@@ -46,24 +59,25 @@ int EDGetRegisterName(const char** regName,
int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
unsigned regID) {
- return disassembler->registerIsStackPointer(regID) ? 1 : 0;
+ return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
}
int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
unsigned regID) {
- return disassembler->registerIsProgramCounter(regID) ? 1 : 0;
+ return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
}
unsigned int EDCreateInsts(EDInstRef *insts,
unsigned int count,
EDDisassemblerRef disassembler,
- EDByteReaderCallback byteReader,
+ ::EDByteReaderCallback byteReader,
uint64_t address,
void *arg) {
unsigned int index;
for (index = 0; index < count; ++index) {
- EDInst *inst = disassembler->createInst(byteReader, address, arg);
+ EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+ address, arg);
if (!inst)
return index;
@@ -76,163 +90,143 @@ unsigned int EDCreateInsts(EDInstRef *insts,
}
void EDReleaseInst(EDInstRef inst) {
- delete inst;
+ delete ((EDInst*)inst);
}
int EDInstByteSize(EDInstRef inst) {
- return inst->byteSize();
+ return ((EDInst*)inst)->byteSize();
}
int EDGetInstString(const char **buf,
EDInstRef inst) {
- return inst->getString(*buf);
+ return ((EDInst*)inst)->getString(*buf);
}
int EDInstID(unsigned *instID, EDInstRef inst) {
- *instID = inst->instID();
+ *instID = ((EDInst*)inst)->instID();
return 0;
}
int EDInstIsBranch(EDInstRef inst) {
- return inst->isBranch();
+ return ((EDInst*)inst)->isBranch();
}
int EDInstIsMove(EDInstRef inst) {
- return inst->isMove();
+ return ((EDInst*)inst)->isMove();
}
int EDBranchTargetID(EDInstRef inst) {
- return inst->branchTargetID();
+ return ((EDInst*)inst)->branchTargetID();
}
int EDMoveSourceID(EDInstRef inst) {
- return inst->moveSourceID();
+ return ((EDInst*)inst)->moveSourceID();
}
int EDMoveTargetID(EDInstRef inst) {
- return inst->moveTargetID();
+ return ((EDInst*)inst)->moveTargetID();
}
int EDNumTokens(EDInstRef inst) {
- return inst->numTokens();
+ return ((EDInst*)inst)->numTokens();
}
int EDGetToken(EDTokenRef *token,
EDInstRef inst,
int index) {
- return inst->getToken(*token, index);
+ return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
}
int EDGetTokenString(const char **buf,
EDTokenRef token) {
- return token->getString(*buf);
+ return ((EDToken*)token)->getString(*buf);
}
int EDOperandIndexForToken(EDTokenRef token) {
- return token->operandID();
+ return ((EDToken*)token)->operandID();
}
int EDTokenIsWhitespace(EDTokenRef token) {
- if (token->type() == EDToken::kTokenWhitespace)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
}
int EDTokenIsPunctuation(EDTokenRef token) {
- if (token->type() == EDToken::kTokenPunctuation)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
}
int EDTokenIsOpcode(EDTokenRef token) {
- if (token->type() == EDToken::kTokenOpcode)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
}
int EDTokenIsLiteral(EDTokenRef token) {
- if (token->type() == EDToken::kTokenLiteral)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
}
int EDTokenIsRegister(EDTokenRef token) {
- if (token->type() == EDToken::kTokenRegister)
- return 1;
- else
- return 0;
+ return ((EDToken*)token)->type() == EDToken::kTokenRegister;
}
int EDTokenIsNegativeLiteral(EDTokenRef token) {
- if (token->type() != EDToken::kTokenLiteral)
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
return -1;
- return token->literalSign();
+ return ((EDToken*)token)->literalSign();
}
-int EDLiteralTokenAbsoluteValue(uint64_t *value,
- EDTokenRef token) {
- if (token->type() != EDToken::kTokenLiteral)
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+ if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
return -1;
- return token->literalAbsoluteValue(*value);
+ return ((EDToken*)token)->literalAbsoluteValue(*value);
}
int EDRegisterTokenValue(unsigned *registerID,
EDTokenRef token) {
- if (token->type() != EDToken::kTokenRegister)
+ if (((EDToken*)token)->type() != EDToken::kTokenRegister)
return -1;
- return token->registerID(*registerID);
+ return ((EDToken*)token)->registerID(*registerID);
}
int EDNumOperands(EDInstRef inst) {
- return inst->numOperands();
+ return ((EDInst*)inst)->numOperands();
}
int EDGetOperand(EDOperandRef *operand,
EDInstRef inst,
int index) {
- return inst->getOperand(*operand, index);
+ return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
}
int EDOperandIsRegister(EDOperandRef operand) {
- return operand->isRegister();
+ return ((EDOperand*)operand)->isRegister();
}
int EDOperandIsImmediate(EDOperandRef operand) {
- return operand->isImmediate();
+ return ((EDOperand*)operand)->isImmediate();
}
int EDOperandIsMemory(EDOperandRef operand) {
- return operand->isMemory();
+ return ((EDOperand*)operand)->isMemory();
}
-int EDRegisterOperandValue(unsigned *value,
- EDOperandRef operand) {
- if (!operand->isRegister())
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isRegister())
return -1;
- *value = operand->regVal();
+ *value = ((EDOperand*)operand)->regVal();
return 0;
}
-int EDImmediateOperandValue(uint64_t *value,
- EDOperandRef operand) {
- if (!operand->isImmediate())
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+ if (!((EDOperand*)operand)->isImmediate())
return -1;
- *value = operand->immediateVal();
+ *value = ((EDOperand*)operand)->immediateVal();
return 0;
}
-int EDEvaluateOperand(uint64_t *result,
- EDOperandRef operand,
- EDRegisterReaderCallback regReader,
- void *arg) {
- return operand->evaluate(*result, regReader, arg);
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+ ::EDRegisterReaderCallback regReader, void *arg) {
+ return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
}
#ifdef __BLOCKS__
@@ -264,15 +258,13 @@ unsigned int EDBlockCreateInsts(EDInstRef *insts,
(void*)&wrapper);
}
-int EDBlockEvaluateOperand(uint64_t *result,
- EDOperandRef operand,
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
EDRegisterBlock_t regBlock) {
- return operand->evaluate(*result, regBlock);
+ return ((EDOperand*)operand)->evaluate(*result, regBlock);
}
-int EDBlockVisitTokens(EDInstRef inst,
- EDTokenVisitor_t visitor) {
- return inst->visitTokens(visitor);
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+ return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
}
#else
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
index 0d2e26f23387..92484bf0ac4d 100644
--- a/tools/edis/Makefile
+++ b/tools/edis/Makefile
@@ -9,8 +9,8 @@
LEVEL = ../..
LIBRARYNAME = EnhancedDisassembly
-
-BUILT_SOURCES = EDInfo.inc
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
@@ -19,23 +19,23 @@ EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
# early so we can set up LINK_COMPONENTS before including Makefile.rules
include $(LEVEL)/Makefile.config
-ifeq ($(ENABLE_PIC),1)
- ifneq ($(DISABLE_EDIS),1)
- ifneq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW))
- LINK_LIBS_IN_SHARED = 1
- SHARED_LIBRARY = 1
- endif
- endif
+LINK_COMPONENTS := mcdisassembler
+
+# If the X86 target is enabled, link in the asmprinter and disassembler.
+ifneq ($(filter $(TARGETS_TO_BUILD), X86),)
+LINK_COMPONENTS += x86asmprinter x86disassembler
endif
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) x86asmprinter x86disassembler
+# If the X86 target is enabled, link in the asmprinter and disassembler.
+ifneq ($(filter $(TARGETS_TO_BUILD), ARM),)
+LINK_COMPONENTS += armasmprinter armdisassembler
+endif
include $(LEVEL)/Makefile.common
ifeq ($(HOST_OS),Darwin)
# extra options to override libtool defaults
LLVMLibsOptions := $(LLVMLibsOptions) \
- -avoid-version \
-Wl,-dead_strip
ifdef EDIS_VERSION
@@ -47,14 +47,8 @@ ifeq ($(HOST_OS),Darwin)
DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
ifneq ($(DARWIN_VERS),8)
LLVMLibsOptions := $(LLVMLibsOptions) \
- -no-undefined -Wl,-install_name \
+ -Wl,-install_name \
-Wl,"@rpath/lib$(LIBRARYNAME)$(SHLIBEXT)"
endif
endif
-EDInfo.inc: $(TBLGEN)
- $(Echo) "Building semantic information header"
- $(Verb) $(TableGen) -o $(call SYSPATH, $@) -gen-enhanced-disassembly-header /dev/null
-
-clean::
- -$(Verb) $(RM) -f EDInfo.inc
diff --git a/tools/gold/README.txt b/tools/gold/README.txt
index 92ee3d16afb4..a906a9083ec4 100644
--- a/tools/gold/README.txt
+++ b/tools/gold/README.txt
@@ -14,8 +14,8 @@ Then build binutils with "make all-gold".
To build the LLVMgold plugin, configure LLVM with the option
--with-binutils-include=/path/to/binutils/src/include/ --enable-pic. To use the
-plugin, run "ld-new --plugin /path/to/libLLVMgold.so".
-Without PIC libLTO and libLLVMgold are not being built (because they would fail
+plugin, run "ld-new --plugin /path/to/LLVMgold.so".
+Without PIC libLTO and LLVMgold are not being built (because they would fail
link on x86-64 with a relocation error: PIC and non-PIC can't be combined).
As an alternative to passing --enable-pic, you can use 'make ENABLE_PIC=1' in
your entire LLVM build.
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 2d0f5bd3af3a..4b58fae96de4 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -66,8 +66,11 @@ namespace options {
static generate_bc generate_bc_file = BC_NO;
static std::string bc_path;
static std::string as_path;
+ static std::vector<std::string> as_args;
static std::vector<std::string> pass_through;
static std::string extra_library_path;
+ static std::string triple;
+ static std::string mcpu;
// Additional options to pass into the code generator.
// Note: This array will contain all plugin options which are not claimed
// as plugin exclusive to pass to the code generator.
@@ -83,6 +86,8 @@ namespace options {
if (opt == "generate-api-file") {
generate_api_file = true;
+ } else if (opt.startswith("mcpu=")) {
+ mcpu = opt.substr(strlen("mcpu="));
} else if (opt.startswith("as=")) {
if (!as_path.empty()) {
(*message)(LDPL_WARNING, "Path to as specified twice. "
@@ -90,11 +95,16 @@ namespace options {
} else {
as_path = opt.substr(strlen("as="));
}
+ } else if (opt.startswith("as-arg=")) {
+ llvm::StringRef item = opt.substr(strlen("as-arg="));
+ as_args.push_back(item.str());
} else if (opt.startswith("extra-library-path=")) {
extra_library_path = opt.substr(strlen("extra_library_path="));
} else if (opt.startswith("pass-through=")) {
llvm::StringRef item = opt.substr(strlen("pass-through="));
pass_through.push_back(item.str());
+ } else if (opt.startswith("mtriple=")) {
+ triple = opt.substr(strlen("mtriple="));
} else if (opt == "emit-llvm") {
generate_bc_file = BC_ONLY;
} else if (opt == "also-emit-llvm") {
@@ -270,6 +280,10 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
lto_get_error_message());
return LDPS_ERR;
}
+
+ if (!options::triple.empty())
+ lto_module_set_target_triple(cf.M, options::triple.c_str());
+
cf.handle = file->handle;
unsigned sym_count = lto_module_get_num_symbols(cf.M);
cf.syms.reserve(sym_count);
@@ -394,6 +408,17 @@ static ld_plugin_status all_symbols_read_hook(void) {
sys::Path p = sys::Program::FindProgramByName(options::as_path);
lto_codegen_set_assembler_path(cg, p.c_str());
}
+ if (!options::as_args.empty()) {
+ std::vector<const char *> as_args_p;
+ for (std::vector<std::string>::iterator I = options::as_args.begin(),
+ E = options::as_args.end(); I != E; ++I) {
+ as_args_p.push_back(I->c_str());
+ }
+ lto_codegen_set_assembler_args(cg, &as_args_p[0], as_args_p.size());
+ }
+ if (!options::mcpu.empty())
+ lto_codegen_set_cpu(cg, options::mcpu.c_str());
+
// Pass through extra options to the code generator.
if (!options::extra.empty()) {
for (std::vector<std::string>::iterator it = options::extra.begin();
@@ -428,15 +453,22 @@ static ld_plugin_status all_symbols_read_hook(void) {
(*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
return LDPS_ERR;
}
- raw_fd_ostream objFile(uniqueObjPath.c_str(), ErrMsg,
- raw_fd_ostream::F_Binary);
+ tool_output_file objFile(uniqueObjPath.c_str(), ErrMsg,
+ raw_fd_ostream::F_Binary);
if (!ErrMsg.empty()) {
(*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
return LDPS_ERR;
}
- objFile.write(buffer, bufsize);
- objFile.close();
+ objFile.os().write(buffer, bufsize);
+ objFile.os().close();
+ if (objFile.os().has_error()) {
+ (*message)(LDPL_ERROR, "Error writing output file '%s'",
+ uniqueObjPath.c_str());
+ objFile.os().clear_error();
+ return LDPS_ERR;
+ }
+ objFile.keep();
lto_codegen_dispose(cg);
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 199a1a920439..8bcc2d8d27e9 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -18,7 +18,6 @@
#include "llvm/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/Verifier.h"
#include "llvm/Support/IRReader.h"
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -76,6 +75,11 @@ MAttrs("mattr",
cl::desc("Target specific attributes (-mattr=help for details)"),
cl::value_desc("a1,+a2,-a3,..."));
+static cl::opt<bool>
+RelaxAll("mc-relax-all",
+ cl::desc("When used with filetype=obj, "
+ "relax all fixups in the emitted object file"));
+
cl::opt<TargetMachine::CodeGenFileType>
FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
cl::desc("Choose a file type (not all types are supported by all targets):"),
@@ -119,87 +123,67 @@ GetFileNameRoot(const std::string &InputFilename) {
return outputFilename;
}
-static formatted_raw_ostream *GetOutputStream(const char *TargetName,
- Triple::OSType OS,
- const char *ProgName) {
- if (OutputFilename != "") {
- if (OutputFilename == "-")
- return new formatted_raw_ostream(outs(),
- formatted_raw_ostream::PRESERVE_STREAM);
-
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
- std::string error;
- raw_fd_ostream *FDOut =
- new raw_fd_ostream(OutputFilename.c_str(), error,
- raw_fd_ostream::F_Binary);
- if (!error.empty()) {
- errs() << error << '\n';
- delete FDOut;
- return 0;
+static tool_output_file *GetOutputStream(const char *TargetName,
+ Triple::OSType OS,
+ const char *ProgName) {
+ // If we don't yet have an output filename, make one.
+ if (OutputFilename.empty()) {
+ if (InputFilename == "-")
+ OutputFilename = "-";
+ else {
+ OutputFilename = GetFileNameRoot(InputFilename);
+
+ switch (FileType) {
+ default: assert(0 && "Unknown file type");
+ case TargetMachine::CGFT_AssemblyFile:
+ if (TargetName[0] == 'c') {
+ if (TargetName[1] == 0)
+ OutputFilename += ".cbe.c";
+ else if (TargetName[1] == 'p' && TargetName[2] == 'p')
+ OutputFilename += ".cpp";
+ else
+ OutputFilename += ".s";
+ } else
+ OutputFilename += ".s";
+ break;
+ case TargetMachine::CGFT_ObjectFile:
+ if (OS == Triple::Win32)
+ OutputFilename += ".obj";
+ else
+ OutputFilename += ".o";
+ break;
+ case TargetMachine::CGFT_Null:
+ OutputFilename += ".null";
+ break;
+ }
}
- formatted_raw_ostream *Out =
- new formatted_raw_ostream(*FDOut, formatted_raw_ostream::DELETE_STREAM);
-
- return Out;
- }
-
- if (InputFilename == "-") {
- OutputFilename = "-";
- return new formatted_raw_ostream(outs(),
- formatted_raw_ostream::PRESERVE_STREAM);
}
- OutputFilename = GetFileNameRoot(InputFilename);
-
+ // Decide if we need "binary" output.
bool Binary = false;
switch (FileType) {
default: assert(0 && "Unknown file type");
case TargetMachine::CGFT_AssemblyFile:
- if (TargetName[0] == 'c') {
- if (TargetName[1] == 0)
- OutputFilename += ".cbe.c";
- else if (TargetName[1] == 'p' && TargetName[2] == 'p')
- OutputFilename += ".cpp";
- else
- OutputFilename += ".s";
- } else
- OutputFilename += ".s";
break;
case TargetMachine::CGFT_ObjectFile:
- if (OS == Triple::Win32)
- OutputFilename += ".obj";
- else
- OutputFilename += ".o";
- Binary = true;
- break;
case TargetMachine::CGFT_Null:
- OutputFilename += ".null";
Binary = true;
break;
}
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
+ // Open the file.
std::string error;
unsigned OpenFlags = 0;
if (Binary) OpenFlags |= raw_fd_ostream::F_Binary;
- raw_fd_ostream *FDOut = new raw_fd_ostream(OutputFilename.c_str(), error,
- OpenFlags);
+ tool_output_file *FDOut = new tool_output_file(OutputFilename.c_str(), error,
+ OpenFlags);
if (!error.empty()) {
errs() << error << '\n';
delete FDOut;
return 0;
}
- formatted_raw_ostream *Out =
- new formatted_raw_ostream(*FDOut, formatted_raw_ostream::DELETE_STREAM);
-
- return Out;
+ return FDOut;
}
// main - Entry point for the llc compiler.
@@ -234,7 +218,7 @@ int main(int argc, char **argv) {
// If we are supposed to override the target triple, do so now.
if (!TargetTriple.empty())
- mod.setTargetTriple(TargetTriple);
+ mod.setTargetTriple(Triple::normalize(TargetTriple));
Triple TheTriple(mod.getTargetTriple());
if (TheTriple.getTriple().empty())
@@ -290,9 +274,9 @@ int main(int argc, char **argv) {
TargetMachine &Target = *target.get();
// Figure out where we are going to send the output...
- formatted_raw_ostream *Out = GetOutputStream(TheTarget->getName(),
- TheTriple.getOS(), argv[0]);
- if (Out == 0) return 1;
+ OwningPtr<tool_output_file> Out
+ (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
+ if (!Out) return 1;
CodeGenOpt::Level OLvl = CodeGenOpt::Default;
switch (OptLevel) {
@@ -306,14 +290,6 @@ int main(int argc, char **argv) {
case '3': OLvl = CodeGenOpt::Aggressive; break;
}
- // Request that addPassesToEmitFile run the Verifier after running
- // passes which modify the IR.
-#ifndef NDEBUG
- bool DisableVerify = false;
-#else
- bool DisableVerify = true;
-#endif
-
// Build up all of the passes that we want to do to the module.
PassManager PM;
@@ -323,27 +299,32 @@ int main(int argc, char **argv) {
else
PM.add(new TargetData(&mod));
- if (!NoVerify)
- PM.add(createVerifierPass());
-
// Override default to generate verbose assembly.
Target.setAsmVerbosityDefault(true);
- // Ask the target to add backend passes as necessary.
- if (Target.addPassesToEmitFile(PM, *Out, FileType, OLvl,
- DisableVerify)) {
- errs() << argv[0] << ": target does not support generation of this"
- << " file type!\n";
- delete Out;
- // And the Out file is empty and useless, so remove it now.
- sys::Path(OutputFilename).eraseFromDisk();
- return 1;
+ if (RelaxAll) {
+ if (FileType != TargetMachine::CGFT_ObjectFile)
+ errs() << argv[0]
+ << ": warning: ignoring -mc-relax-all because filetype != obj";
+ else
+ Target.setMCRelaxAll(true);
}
- PM.run(mod);
+ {
+ formatted_raw_ostream FOS(Out->os());
+
+ // Ask the target to add backend passes as necessary.
+ if (Target.addPassesToEmitFile(PM, FOS, FileType, OLvl, NoVerify)) {
+ errs() << argv[0] << ": target does not support generation of this"
+ << " file type!\n";
+ return 1;
+ }
+
+ PM.run(mod);
+ }
- // Delete the ostream.
- delete Out;
+ // Declare success.
+ Out->keep();
return 0;
}
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 4e3e07ffd22c..4c377805e6a3 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -16,6 +16,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -157,7 +158,7 @@ int main(int argc, char **argv, char * const *envp) {
// If we are supposed to override the target triple, do so now.
if (!TargetTriple.empty())
- Mod->setTargetTriple(TargetTriple);
+ Mod->setTargetTriple(Triple::normalize(TargetTriple));
CodeGenOpt::Level OLvl = CodeGenOpt::Default;
switch (OptLevel) {
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index d39d6c8a31f6..1eaa4b3bea44 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -68,22 +68,20 @@ static void WriteOutputFile(const Module *M) {
}
}
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT.
- if (OutputFilename != "-")
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
std::string ErrorInfo;
- std::auto_ptr<raw_ostream> Out
- (new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary));
+ OwningPtr<tool_output_file> Out
+ (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary));
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
exit(1);
}
- if (Force || !CheckBitcodeOutputToConsole(*Out, true))
- WriteBitcodeToFile(M, *Out);
+ if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
+ WriteBitcodeToFile(M, Out->os());
+
+ // Declare success.
+ Out->keep();
}
int main(int argc, char **argv) {
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 732ff11fe46d..9c0d675793d3 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -46,9 +46,6 @@ using namespace llvm;
static cl::opt<std::string>
InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
-static cl::opt<std::string>
- OutputFilename("-o", cl::init("-"), cl::desc("<output file>"));
-
static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
//===----------------------------------------------------------------------===//
@@ -239,6 +236,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
case bitc::FUNC_CODE_INST_VSELECT: return "INST_VSELECT";
case bitc::FUNC_CODE_DEBUG_LOC: return "DEBUG_LOC";
case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: return "DEBUG_LOC_AGAIN";
+ case bitc::FUNC_CODE_INST_CALL2: return "INST_CALL2";
+ case bitc::FUNC_CODE_DEBUG_LOC2: return "DEBUG_LOC2";
}
case bitc::TYPE_SYMTAB_BLOCK_ID:
switch (CodeID) {
@@ -259,13 +258,17 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
case bitc::METADATA_BLOCK_ID:
switch(CodeID) {
default:return 0;
- case bitc::METADATA_STRING: return "MDSTRING";
- case bitc::METADATA_NODE: return "MDNODE";
- case bitc::METADATA_FN_NODE: return "FN_MDNODE";
+ case bitc::METADATA_STRING: return "METADATA_STRING";
+ case bitc::METADATA_NODE: return "METADATA_NODE";
+ case bitc::METADATA_FN_NODE: return "METADATA_FN_NODE";
case bitc::METADATA_NAME: return "METADATA_NAME";
- case bitc::METADATA_NAMED_NODE: return "NAMEDMDNODE";
+ case bitc::METADATA_NAMED_NODE: return "METADATA_NAMED_NODE";
case bitc::METADATA_KIND: return "METADATA_KIND";
case bitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
+ case bitc::METADATA_NODE2: return "METADATA_NODE2";
+ case bitc::METADATA_FN_NODE2: return "METADATA_FN_NODE2";
+ case bitc::METADATA_NAMED_NODE2: return "METADATA_NAMED_NODE2";
+ case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
}
}
}
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 7a43dbafeb19..663cae5ed2d4 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -78,7 +78,7 @@ add_custom_command(OUTPUT ${LIBDEPS_TMP}
add_custom_command(OUTPUT ${LIBDEPS}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBDEPS_TMP} ${LIBDEPS}
DEPENDS ${LIBDEPS_TMP}
- COMMENT "Updated ${LIBDEPS} because dependencies changed")
+ COMMENT "Updating ${LIBDEPS} if necessary...")
add_custom_command(OUTPUT ${FINAL_LIBDEPS}
COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS} || ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS}
diff --git a/tools/llvm-diff/CMakeLists.txt b/tools/llvm-diff/CMakeLists.txt
new file mode 100644
index 000000000000..f6d65c947a58
--- /dev/null
+++ b/tools/llvm-diff/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS support asmparser bitreader)
+
+add_llvm_tool(llvm-diff
+ llvm-diff.cpp
+ DifferenceEngine.cpp
+ )
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
new file mode 100644
index 000000000000..b0a24d0737ec
--- /dev/null
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -0,0 +1,676 @@
+//===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the implementation of the LLVM difference
+// engine, which structurally compares global values within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DifferenceEngine.h"
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/type_traits.h"
+
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+/// A priority queue, implemented as a heap.
+template <class T, class Sorter, unsigned InlineCapacity>
+class PriorityQueue {
+ Sorter Precedes;
+ llvm::SmallVector<T, InlineCapacity> Storage;
+
+public:
+ PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
+
+ /// Checks whether the heap is empty.
+ bool empty() const { return Storage.empty(); }
+
+ /// Insert a new value on the heap.
+ void insert(const T &V) {
+ unsigned Index = Storage.size();
+ Storage.push_back(V);
+ if (Index == 0) return;
+
+ T *data = Storage.data();
+ while (true) {
+ unsigned Target = (Index + 1) / 2 - 1;
+ if (!Precedes(data[Index], data[Target])) return;
+ std::swap(data[Index], data[Target]);
+ if (Target == 0) return;
+ Index = Target;
+ }
+ }
+
+ /// Remove the minimum value in the heap. Only valid on a non-empty heap.
+ T remove_min() {
+ assert(!empty());
+ T tmp = Storage[0];
+
+ unsigned NewSize = Storage.size() - 1;
+ if (NewSize) {
+ // Move the slot at the end to the beginning.
+ if (isPodLike<T>::value)
+ Storage[0] = Storage[NewSize];
+ else
+ std::swap(Storage[0], Storage[NewSize]);
+
+ // Bubble the root up as necessary.
+ unsigned Index = 0;
+ while (true) {
+ // With a 1-based index, the children would be Index*2 and Index*2+1.
+ unsigned R = (Index + 1) * 2;
+ unsigned L = R - 1;
+
+ // If R is out of bounds, we're done after this in any case.
+ if (R >= NewSize) {
+ // If L is also out of bounds, we're done immediately.
+ if (L >= NewSize) break;
+
+ // Otherwise, test whether we should swap L and Index.
+ if (Precedes(Storage[L], Storage[Index]))
+ std::swap(Storage[L], Storage[Index]);
+ break;
+ }
+
+ // Otherwise, we need to compare with the smaller of L and R.
+ // Prefer R because it's closer to the end of the array.
+ unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
+
+ // If Index is >= the min of L and R, then heap ordering is restored.
+ if (!Precedes(Storage[IndexToTest], Storage[Index]))
+ break;
+
+ // Otherwise, keep bubbling up.
+ std::swap(Storage[IndexToTest], Storage[Index]);
+ Index = IndexToTest;
+ }
+ }
+ Storage.pop_back();
+
+ return tmp;
+ }
+};
+
+/// A function-scope difference engine.
+class FunctionDifferenceEngine {
+ DifferenceEngine &Engine;
+
+ /// The current mapping from old local values to new local values.
+ DenseMap<Value*, Value*> Values;
+
+ /// The current mapping from old blocks to new blocks.
+ DenseMap<BasicBlock*, BasicBlock*> Blocks;
+
+ DenseSet<std::pair<Value*, Value*> > TentativeValues;
+
+ unsigned getUnprocPredCount(BasicBlock *Block) const {
+ unsigned Count = 0;
+ for (pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; ++I)
+ if (!Blocks.count(*I)) Count++;
+ return Count;
+ }
+
+ typedef std::pair<BasicBlock*, BasicBlock*> BlockPair;
+
+ /// A type which sorts a priority queue by the number of unprocessed
+ /// predecessor blocks it has remaining.
+ ///
+ /// This is actually really expensive to calculate.
+ struct QueueSorter {
+ const FunctionDifferenceEngine &fde;
+ explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
+
+ bool operator()(const BlockPair &Old, const BlockPair &New) {
+ return fde.getUnprocPredCount(Old.first)
+ < fde.getUnprocPredCount(New.first);
+ }
+ };
+
+ /// A queue of unified blocks to process.
+ PriorityQueue<BlockPair, QueueSorter, 20> Queue;
+
+ /// Try to unify the given two blocks. Enqueues them for processing
+ /// if they haven't already been processed.
+ ///
+ /// Returns true if there was a problem unifying them.
+ bool tryUnify(BasicBlock *L, BasicBlock *R) {
+ BasicBlock *&Ref = Blocks[L];
+
+ if (Ref) {
+ if (Ref == R) return false;
+
+ Engine.logf("successor %l cannot be equivalent to %r; "
+ "it's already equivalent to %r")
+ << L << R << Ref;
+ return true;
+ }
+
+ Ref = R;
+ Queue.insert(BlockPair(L, R));
+ return false;
+ }
+
+ /// Unifies two instructions, given that they're known not to have
+ /// structural differences.
+ void unify(Instruction *L, Instruction *R) {
+ DifferenceEngine::Context C(Engine, L, R);
+
+ bool Result = diff(L, R, true, true);
+ assert(!Result && "structural differences second time around?");
+ (void) Result;
+ if (!L->use_empty())
+ Values[L] = R;
+ }
+
+ void processQueue() {
+ while (!Queue.empty()) {
+ BlockPair Pair = Queue.remove_min();
+ diff(Pair.first, Pair.second);
+ }
+ }
+
+ void diff(BasicBlock *L, BasicBlock *R) {
+ DifferenceEngine::Context C(Engine, L, R);
+
+ BasicBlock::iterator LI = L->begin(), LE = L->end();
+ BasicBlock::iterator RI = R->begin(), RE = R->end();
+
+ llvm::SmallVector<std::pair<Instruction*,Instruction*>, 20> TentativePairs;
+
+ do {
+ assert(LI != LE && RI != RE);
+ Instruction *LeftI = &*LI, *RightI = &*RI;
+
+ // If the instructions differ, start the more sophisticated diff
+ // algorithm at the start of the block.
+ if (diff(LeftI, RightI, false, false)) {
+ TentativeValues.clear();
+ return runBlockDiff(L->begin(), R->begin());
+ }
+
+ // Otherwise, tentatively unify them.
+ if (!LeftI->use_empty())
+ TentativeValues.insert(std::make_pair(LeftI, RightI));
+
+ ++LI, ++RI;
+ } while (LI != LE); // This is sufficient: we can't get equality of
+ // terminators if there are residual instructions.
+
+ // Unify everything in the block, non-tentatively this time.
+ TentativeValues.clear();
+ for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
+ unify(&*LI, &*RI);
+ }
+
+ bool matchForBlockDiff(Instruction *L, Instruction *R);
+ void runBlockDiff(BasicBlock::iterator LI, BasicBlock::iterator RI);
+
+ bool diffCallSites(CallSite L, CallSite R, bool Complain) {
+ // FIXME: call attributes
+ if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) {
+ if (Complain) Engine.log("called functions differ");
+ return true;
+ }
+ if (L.arg_size() != R.arg_size()) {
+ if (Complain) Engine.log("argument counts differ");
+ return true;
+ }
+ for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
+ if (!equivalentAsOperands(L.getArgument(I), R.getArgument(I))) {
+ if (Complain)
+ Engine.logf("arguments %l and %r differ")
+ << L.getArgument(I) << R.getArgument(I);
+ return true;
+ }
+ return false;
+ }
+
+ bool diff(Instruction *L, Instruction *R, bool Complain, bool TryUnify) {
+ // FIXME: metadata (if Complain is set)
+
+ // Different opcodes always imply different operations.
+ if (L->getOpcode() != R->getOpcode()) {
+ if (Complain) Engine.log("different instruction types");
+ return true;
+ }
+
+ if (isa<CmpInst>(L)) {
+ if (cast<CmpInst>(L)->getPredicate()
+ != cast<CmpInst>(R)->getPredicate()) {
+ if (Complain) Engine.log("different predicates");
+ return true;
+ }
+ } else if (isa<CallInst>(L)) {
+ return diffCallSites(CallSite(L), CallSite(R), Complain);
+ } else if (isa<PHINode>(L)) {
+ // FIXME: implement.
+
+ // This is really wierd; type uniquing is broken?
+ if (L->getType() != R->getType()) {
+ if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
+ if (Complain) Engine.log("different phi types");
+ return true;
+ }
+ }
+ return false;
+
+ // Terminators.
+ } else if (isa<InvokeInst>(L)) {
+ InvokeInst *LI = cast<InvokeInst>(L);
+ InvokeInst *RI = cast<InvokeInst>(R);
+ if (diffCallSites(CallSite(LI), CallSite(RI), Complain))
+ return true;
+
+ if (TryUnify) {
+ tryUnify(LI->getNormalDest(), RI->getNormalDest());
+ tryUnify(LI->getUnwindDest(), RI->getUnwindDest());
+ }
+ return false;
+
+ } else if (isa<BranchInst>(L)) {
+ BranchInst *LI = cast<BranchInst>(L);
+ BranchInst *RI = cast<BranchInst>(R);
+ if (LI->isConditional() != RI->isConditional()) {
+ if (Complain) Engine.log("branch conditionality differs");
+ return true;
+ }
+
+ if (LI->isConditional()) {
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (Complain) Engine.log("branch conditions differ");
+ return true;
+ }
+ if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
+ }
+ if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
+ return false;
+
+ } else if (isa<SwitchInst>(L)) {
+ SwitchInst *LI = cast<SwitchInst>(L);
+ SwitchInst *RI = cast<SwitchInst>(R);
+ if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+ if (Complain) Engine.log("switch conditions differ");
+ return true;
+ }
+ if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
+
+ bool Difference = false;
+
+ DenseMap<ConstantInt*,BasicBlock*> LCases;
+ for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I)
+ LCases[LI->getCaseValue(I)] = LI->getSuccessor(I);
+ for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) {
+ ConstantInt *CaseValue = RI->getCaseValue(I);
+ BasicBlock *LCase = LCases[CaseValue];
+ if (LCase) {
+ if (TryUnify) tryUnify(LCase, RI->getSuccessor(I));
+ LCases.erase(CaseValue);
+ } else if (!Difference) {
+ if (Complain)
+ Engine.logf("right switch has extra case %r") << CaseValue;
+ Difference = true;
+ }
+ }
+ if (!Difference)
+ for (DenseMap<ConstantInt*,BasicBlock*>::iterator
+ I = LCases.begin(), E = LCases.end(); I != E; ++I) {
+ if (Complain)
+ Engine.logf("left switch has extra case %l") << I->first;
+ Difference = true;
+ }
+ return Difference;
+ } else if (isa<UnreachableInst>(L)) {
+ return false;
+ }
+
+ if (L->getNumOperands() != R->getNumOperands()) {
+ if (Complain) Engine.log("instructions have different operand counts");
+ return true;
+ }
+
+ for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
+ Value *LO = L->getOperand(I), *RO = R->getOperand(I);
+ if (!equivalentAsOperands(LO, RO)) {
+ if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool equivalentAsOperands(Constant *L, Constant *R) {
+ // Use equality as a preliminary filter.
+ if (L == R)
+ return true;
+
+ if (L->getValueID() != R->getValueID())
+ return false;
+
+ // Ask the engine about global values.
+ if (isa<GlobalValue>(L))
+ return Engine.equivalentAsOperands(cast<GlobalValue>(L),
+ cast<GlobalValue>(R));
+
+ // Compare constant expressions structurally.
+ if (isa<ConstantExpr>(L))
+ return equivalentAsOperands(cast<ConstantExpr>(L),
+ cast<ConstantExpr>(R));
+
+ // Nulls of the "same type" don't always actually have the same
+ // type; I don't know why. Just white-list them.
+ if (isa<ConstantPointerNull>(L))
+ return true;
+
+ // Block addresses only match if we've already encountered the
+ // block. FIXME: tentative matches?
+ if (isa<BlockAddress>(L))
+ return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
+ == cast<BlockAddress>(R)->getBasicBlock();
+
+ return false;
+ }
+
+ bool equivalentAsOperands(ConstantExpr *L, ConstantExpr *R) {
+ if (L == R)
+ return true;
+ if (L->getOpcode() != R->getOpcode())
+ return false;
+
+ switch (L->getOpcode()) {
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ if (L->getPredicate() != R->getPredicate())
+ return false;
+ break;
+
+ case Instruction::GetElementPtr:
+ // FIXME: inbounds?
+ break;
+
+ default:
+ break;
+ }
+
+ if (L->getNumOperands() != R->getNumOperands())
+ return false;
+
+ for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I)
+ if (!equivalentAsOperands(L->getOperand(I), R->getOperand(I)))
+ return false;
+
+ return true;
+ }
+
+ bool equivalentAsOperands(Value *L, Value *R) {
+ // Fall out if the values have different kind.
+ // This possibly shouldn't take priority over oracles.
+ if (L->getValueID() != R->getValueID())
+ return false;
+
+ // Value subtypes: Argument, Constant, Instruction, BasicBlock,
+ // InlineAsm, MDNode, MDString, PseudoSourceValue
+
+ if (isa<Constant>(L))
+ return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
+
+ if (isa<Instruction>(L))
+ return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
+
+ if (isa<Argument>(L))
+ return Values[L] == R;
+
+ if (isa<BasicBlock>(L))
+ return Blocks[cast<BasicBlock>(L)] != R;
+
+ // Pretend everything else is identical.
+ return true;
+ }
+
+ // Avoid a gcc warning about accessing 'this' in an initializer.
+ FunctionDifferenceEngine *this_() { return this; }
+
+public:
+ FunctionDifferenceEngine(DifferenceEngine &Engine) :
+ Engine(Engine), Queue(QueueSorter(*this_())) {}
+
+ void diff(Function *L, Function *R) {
+ if (L->arg_size() != R->arg_size())
+ Engine.log("different argument counts");
+
+ // Map the arguments.
+ for (Function::arg_iterator
+ LI = L->arg_begin(), LE = L->arg_end(),
+ RI = R->arg_begin(), RE = R->arg_end();
+ LI != LE && RI != RE; ++LI, ++RI)
+ Values[&*LI] = &*RI;
+
+ tryUnify(&*L->begin(), &*R->begin());
+ processQueue();
+ }
+};
+
+struct DiffEntry {
+ DiffEntry() : Cost(0) {}
+
+ unsigned Cost;
+ llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
+};
+
+bool FunctionDifferenceEngine::matchForBlockDiff(Instruction *L,
+ Instruction *R) {
+ return !diff(L, R, false, false);
+}
+
+void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
+ BasicBlock::iterator RStart) {
+ BasicBlock::iterator LE = LStart->getParent()->end();
+ BasicBlock::iterator RE = RStart->getParent()->end();
+
+ unsigned NL = std::distance(LStart, LE);
+
+ SmallVector<DiffEntry, 20> Paths1(NL+1);
+ SmallVector<DiffEntry, 20> Paths2(NL+1);
+
+ DiffEntry *Cur = Paths1.data();
+ DiffEntry *Next = Paths2.data();
+
+ const unsigned LeftCost = 2;
+ const unsigned RightCost = 2;
+ const unsigned MatchCost = 0;
+
+ assert(TentativeValues.empty());
+
+ // Initialize the first column.
+ for (unsigned I = 0; I != NL+1; ++I) {
+ Cur[I].Cost = I * LeftCost;
+ for (unsigned J = 0; J != I; ++J)
+ Cur[I].Path.push_back(DifferenceEngine::DC_left);
+ }
+
+ for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) {
+ // Initialize the first row.
+ Next[0] = Cur[0];
+ Next[0].Cost += RightCost;
+ Next[0].Path.push_back(DifferenceEngine::DC_right);
+
+ unsigned Index = 1;
+ for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) {
+ if (matchForBlockDiff(&*LI, &*RI)) {
+ Next[Index] = Cur[Index-1];
+ Next[Index].Cost += MatchCost;
+ Next[Index].Path.push_back(DifferenceEngine::DC_match);
+ TentativeValues.insert(std::make_pair(&*LI, &*RI));
+ } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
+ Next[Index] = Next[Index-1];
+ Next[Index].Cost += LeftCost;
+ Next[Index].Path.push_back(DifferenceEngine::DC_left);
+ } else {
+ Next[Index] = Cur[Index];
+ Next[Index].Cost += RightCost;
+ Next[Index].Path.push_back(DifferenceEngine::DC_right);
+ }
+ }
+
+ std::swap(Cur, Next);
+ }
+
+ // We don't need the tentative values anymore; everything from here
+ // on out should be non-tentative.
+ TentativeValues.clear();
+
+ SmallVectorImpl<char> &Path = Cur[NL].Path;
+ BasicBlock::iterator LI = LStart, RI = RStart;
+
+ DifferenceEngine::DiffLogBuilder Diff(Engine);
+
+ // Drop trailing matches.
+ while (Path.back() == DifferenceEngine::DC_match)
+ Path.pop_back();
+
+ // Skip leading matches.
+ SmallVectorImpl<char>::iterator
+ PI = Path.begin(), PE = Path.end();
+ while (PI != PE && *PI == DifferenceEngine::DC_match) {
+ unify(&*LI, &*RI);
+ ++PI, ++LI, ++RI;
+ }
+
+ for (; PI != PE; ++PI) {
+ switch (static_cast<DifferenceEngine::DiffChange>(*PI)) {
+ case DifferenceEngine::DC_match:
+ assert(LI != LE && RI != RE);
+ {
+ Instruction *L = &*LI, *R = &*RI;
+ unify(L, R);
+ Diff.addMatch(L, R);
+ }
+ ++LI; ++RI;
+ break;
+
+ case DifferenceEngine::DC_left:
+ assert(LI != LE);
+ Diff.addLeft(&*LI);
+ ++LI;
+ break;
+
+ case DifferenceEngine::DC_right:
+ assert(RI != RE);
+ Diff.addRight(&*RI);
+ ++RI;
+ break;
+ }
+ }
+
+ // Finishing unifying and complaining about the tails of the block,
+ // which should be matches all the way through.
+ while (LI != LE) {
+ assert(RI != RE);
+ unify(&*LI, &*RI);
+ ++LI, ++RI;
+ }
+
+ // If the terminators have different kinds, but one is an invoke and the
+ // other is an unconditional branch immediately following a call, unify
+ // the results and the destinations.
+ TerminatorInst *LTerm = LStart->getParent()->getTerminator();
+ TerminatorInst *RTerm = RStart->getParent()->getTerminator();
+ if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
+ if (cast<BranchInst>(LTerm)->isConditional()) return;
+ BasicBlock::iterator I = LTerm;
+ if (I == LStart->getParent()->begin()) return;
+ --I;
+ if (!isa<CallInst>(*I)) return;
+ CallInst *LCall = cast<CallInst>(&*I);
+ InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
+ if (!equivalentAsOperands(LCall->getCalledValue(), RInvoke->getCalledValue()))
+ return;
+ if (!LCall->use_empty())
+ Values[LCall] = RInvoke;
+ tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
+ } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
+ if (cast<BranchInst>(RTerm)->isConditional()) return;
+ BasicBlock::iterator I = RTerm;
+ if (I == RStart->getParent()->begin()) return;
+ --I;
+ if (!isa<CallInst>(*I)) return;
+ CallInst *RCall = cast<CallInst>(I);
+ InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
+ if (!equivalentAsOperands(LInvoke->getCalledValue(), RCall->getCalledValue()))
+ return;
+ if (!LInvoke->use_empty())
+ Values[LInvoke] = RCall;
+ tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
+ }
+}
+
+}
+
+void DifferenceEngine::diff(Function *L, Function *R) {
+ Context C(*this, L, R);
+
+ // FIXME: types
+ // FIXME: attributes and CC
+ // FIXME: parameter attributes
+
+ // If both are declarations, we're done.
+ if (L->empty() && R->empty())
+ return;
+ else if (L->empty())
+ log("left function is declaration, right function is definition");
+ else if (R->empty())
+ log("right function is declaration, left function is definition");
+ else
+ FunctionDifferenceEngine(*this).diff(L, R);
+}
+
+void DifferenceEngine::diff(Module *L, Module *R) {
+ StringSet<> LNames;
+ SmallVector<std::pair<Function*,Function*>, 20> Queue;
+
+ for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ Function *LFn = &*I;
+ LNames.insert(LFn->getName());
+
+ if (Function *RFn = R->getFunction(LFn->getName()))
+ Queue.push_back(std::make_pair(LFn, RFn));
+ else
+ logf("function %l exists only in left module") << LFn;
+ }
+
+ for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) {
+ Function *RFn = &*I;
+ if (!LNames.count(RFn->getName()))
+ logf("function %r exists only in right module") << RFn;
+ }
+
+ for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator
+ I = Queue.begin(), E = Queue.end(); I != E; ++I)
+ diff(I->first, I->second);
+}
+
+bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) {
+ if (globalValueOracle) return (*globalValueOracle)(L, R);
+ return L->getName() == R->getName();
+}
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
new file mode 100644
index 000000000000..6eefb06118fa
--- /dev/null
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -0,0 +1,179 @@
+//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference engine,
+// which structurally compares functions within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LLVM_DIFFERENCE_ENGINE_H_
+#define _LLVM_DIFFERENCE_ENGINE_H_
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <utility>
+
+namespace llvm {
+ class Function;
+ class GlobalValue;
+ class Instruction;
+ class LLVMContext;
+ class Module;
+ class Twine;
+ class Value;
+
+ /// A class for performing structural comparisons of LLVM assembly.
+ class DifferenceEngine {
+ public:
+ /// A temporary-object class for building up log messages.
+ class LogBuilder {
+ DifferenceEngine &Engine;
+
+ /// The use of a stored StringRef here is okay because
+ /// LogBuilder should be used only as a temporary, and as a
+ /// temporary it will be destructed before whatever temporary
+ /// might be initializing this format.
+ StringRef Format;
+
+ SmallVector<Value*, 4> Arguments;
+
+ public:
+ LogBuilder(DifferenceEngine &Engine, StringRef Format)
+ : Engine(Engine), Format(Format) {}
+
+ LogBuilder &operator<<(Value *V) {
+ Arguments.push_back(V);
+ return *this;
+ }
+
+ ~LogBuilder() {
+ Engine.consumer.logf(*this);
+ }
+
+ StringRef getFormat() const { return Format; }
+
+ unsigned getNumArguments() const { return Arguments.size(); }
+ Value *getArgument(unsigned I) const { return Arguments[I]; }
+ };
+
+ enum DiffChange { DC_match, DC_left, DC_right };
+
+ /// A temporary-object class for building up diff messages.
+ class DiffLogBuilder {
+ typedef std::pair<Instruction*,Instruction*> DiffRecord;
+ SmallVector<DiffRecord, 20> Diff;
+
+ DifferenceEngine &Engine;
+
+ public:
+ DiffLogBuilder(DifferenceEngine &Engine) : Engine(Engine) {}
+ ~DiffLogBuilder() { Engine.consumer.logd(*this); }
+
+ void addMatch(Instruction *L, Instruction *R) {
+ Diff.push_back(DiffRecord(L, R));
+ }
+ void addLeft(Instruction *L) {
+ // HACK: VS 2010 has a bug in the stdlib that requires this.
+ Diff.push_back(DiffRecord(L, DiffRecord::second_type(0)));
+ }
+ void addRight(Instruction *R) {
+ // HACK: VS 2010 has a bug in the stdlib that requires this.
+ Diff.push_back(DiffRecord(DiffRecord::first_type(0), R));
+ }
+
+ unsigned getNumLines() const { return Diff.size(); }
+ DiffChange getLineKind(unsigned I) const {
+ return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left)
+ : DC_right);
+ }
+ Instruction *getLeft(unsigned I) const { return Diff[I].first; }
+ Instruction *getRight(unsigned I) const { return Diff[I].second; }
+ };
+
+ /// The interface for consumers of difference data.
+ struct Consumer {
+ /// Record that a local context has been entered. Left and
+ /// Right are IR "containers" of some sort which are being
+ /// considered for structural equivalence: global variables,
+ /// functions, blocks, instructions, etc.
+ virtual void enterContext(Value *Left, Value *Right) = 0;
+
+ /// Record that a local context has been exited.
+ virtual void exitContext() = 0;
+
+ /// Record a difference within the current context.
+ virtual void log(StringRef Text) = 0;
+
+ /// Record a formatted difference within the current context.
+ virtual void logf(const LogBuilder &Log) = 0;
+
+ /// Record a line-by-line instruction diff.
+ virtual void logd(const DiffLogBuilder &Log) = 0;
+
+ protected:
+ virtual ~Consumer() {}
+ };
+
+ /// A RAII object for recording the current context.
+ struct Context {
+ Context(DifferenceEngine &Engine, Value *L, Value *R) : Engine(Engine) {
+ Engine.consumer.enterContext(L, R);
+ }
+
+ ~Context() {
+ Engine.consumer.exitContext();
+ }
+
+ private:
+ DifferenceEngine &Engine;
+ };
+
+ /// An oracle for answering whether two values are equivalent as
+ /// operands.
+ struct Oracle {
+ virtual bool operator()(Value *L, Value *R) = 0;
+
+ protected:
+ virtual ~Oracle() {}
+ };
+
+ DifferenceEngine(LLVMContext &context, Consumer &consumer)
+ : context(context), consumer(consumer), globalValueOracle(0) {}
+
+ void diff(Module *L, Module *R);
+ void diff(Function *L, Function *R);
+
+ void log(StringRef text) {
+ consumer.log(text);
+ }
+
+ LogBuilder logf(StringRef text) {
+ return LogBuilder(*this, text);
+ }
+
+ /// Installs an oracle to decide whether two global values are
+ /// equivalent as operands. Without an oracle, global values are
+ /// considered equivalent as operands precisely when they have the
+ /// same name.
+ void setGlobalValueOracle(Oracle *oracle) {
+ globalValueOracle = oracle;
+ }
+
+ /// Determines whether two global values are equivalent.
+ bool equivalentAsOperands(GlobalValue *L, GlobalValue *R);
+
+ private:
+ LLVMContext &context;
+ Consumer &consumer;
+ Oracle *globalValueOracle;
+ };
+}
+
+#endif
diff --git a/tools/llvmc/example/Skeleton/plugins/Plugin/Makefile b/tools/llvm-diff/Makefile
index 54f722165dc7..58e49fa95962 100644
--- a/tools/llvmc/example/Skeleton/plugins/Plugin/Makefile
+++ b/tools/llvm-diff/Makefile
@@ -1,17 +1,17 @@
-##===- llvmc/example/Skeleton/plugins/Plugin/Makefile ------*- Makefile -*-===##
-#
+##===- tools/llvm-diff/Makefile ----------------------------*- Makefile -*-===##
+#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
-#
+#
##===----------------------------------------------------------------------===##
-LEVEL = $(LLVMC_BASE_LEVEL)/../..
-
-# Change this to the name of your plugin.
-LLVMC_PLUGIN = Plugin
+LEVEL = ../..
+TOOLNAME = llvm-diff
+LINK_COMPONENTS := asmparser bitreader
-BUILT_SOURCES = AutoGenerated.inc
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
new file mode 100644
index 000000000000..16a990fb2812
--- /dev/null
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -0,0 +1,331 @@
+//===-- llvm-diff.cpp - Module comparator command-line driver ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the command-line driver for the difference engine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DifferenceEngine.h"
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <string>
+#include <utility>
+
+
+using namespace llvm;
+
+/// Reads a module from a file. If the filename ends in .ll, it is
+/// interpreted as an assembly file; otherwise, it is interpreted as
+/// bitcode. On error, messages are written to stderr and null is
+/// returned.
+static Module *ReadModule(LLVMContext &Context, StringRef Name) {
+ // LLVM assembly path.
+ if (Name.endswith(".ll")) {
+ SMDiagnostic Diag;
+ Module *M = ParseAssemblyFile(Name, Diag, Context);
+ if (M) return M;
+
+ Diag.Print("llvmdiff", errs());
+ return 0;
+ }
+
+ // Bitcode path.
+ MemoryBuffer *Buffer = MemoryBuffer::getFile(Name);
+
+ // ParseBitcodeFile takes ownership of the buffer if it succeeds.
+ std::string Error;
+ Module *M = ParseBitcodeFile(Buffer, Context, &Error);
+ if (M) return M;
+
+ errs() << "error parsing " << Name << ": " << Error;
+ delete Buffer;
+ return 0;
+}
+
+namespace {
+struct DiffContext {
+ DiffContext(Value *L, Value *R)
+ : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
+ Value *L;
+ Value *R;
+ bool Differences;
+ bool IsFunction;
+ DenseMap<Value*,unsigned> LNumbering;
+ DenseMap<Value*,unsigned> RNumbering;
+};
+
+void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering) {
+ unsigned IN = 0;
+
+ // Arguments get the first numbers.
+ for (Function::arg_iterator
+ AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
+ if (!AI->hasName())
+ Numbering[&*AI] = IN++;
+
+ // Walk the basic blocks in order.
+ for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
+ if (!FI->hasName())
+ Numbering[&*FI] = IN++;
+
+ // Walk the instructions in order.
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
+ // void instructions don't get numbers.
+ if (!BI->hasName() && !BI->getType()->isVoidTy())
+ Numbering[&*BI] = IN++;
+ }
+
+ assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
+}
+
+class DiffConsumer : public DifferenceEngine::Consumer {
+private:
+ raw_ostream &out;
+ Module *LModule;
+ Module *RModule;
+ SmallVector<DiffContext, 5> contexts;
+ bool Differences;
+ unsigned Indent;
+
+ void printValue(Value *V, bool isL) {
+ if (V->hasName()) {
+ out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
+ return;
+ }
+ if (V->getType()->isVoidTy()) {
+ if (isa<StoreInst>(V)) {
+ out << "store to ";
+ printValue(cast<StoreInst>(V)->getPointerOperand(), isL);
+ } else if (isa<CallInst>(V)) {
+ out << "call to ";
+ printValue(cast<CallInst>(V)->getCalledValue(), isL);
+ } else if (isa<InvokeInst>(V)) {
+ out << "invoke to ";
+ printValue(cast<InvokeInst>(V)->getCalledValue(), isL);
+ } else {
+ out << *V;
+ }
+ return;
+ }
+
+ unsigned N = contexts.size();
+ while (N > 0) {
+ --N;
+ DiffContext &ctxt = contexts[N];
+ if (!ctxt.IsFunction) continue;
+ if (isL) {
+ if (ctxt.LNumbering.empty())
+ ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering);
+ out << '%' << ctxt.LNumbering[V];
+ return;
+ } else {
+ if (ctxt.RNumbering.empty())
+ ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering);
+ out << '%' << ctxt.RNumbering[V];
+ return;
+ }
+ }
+
+ out << "<anonymous>";
+ }
+
+ void header() {
+ if (contexts.empty()) return;
+ for (SmallVectorImpl<DiffContext>::iterator
+ I = contexts.begin(), E = contexts.end(); I != E; ++I) {
+ if (I->Differences) continue;
+ if (isa<Function>(I->L)) {
+ // Extra newline between functions.
+ if (Differences) out << "\n";
+
+ Function *L = cast<Function>(I->L);
+ Function *R = cast<Function>(I->R);
+ if (L->getName() != R->getName())
+ out << "in function " << L->getName()
+ << " / " << R->getName() << ":\n";
+ else
+ out << "in function " << L->getName() << ":\n";
+ } else if (isa<BasicBlock>(I->L)) {
+ BasicBlock *L = cast<BasicBlock>(I->L);
+ BasicBlock *R = cast<BasicBlock>(I->R);
+ if (L->hasName() && R->hasName() && L->getName() == R->getName())
+ out << " in block %" << L->getName() << ":\n";
+ else {
+ out << " in block ";
+ printValue(L, true);
+ out << " / ";
+ printValue(R, false);
+ out << ":\n";
+ }
+ } else if (isa<Instruction>(I->L)) {
+ out << " in instruction ";
+ printValue(I->L, true);
+ out << " / ";
+ printValue(I->R, false);
+ out << ":\n";
+ }
+
+ I->Differences = true;
+ }
+ }
+
+ void indent() {
+ unsigned N = Indent;
+ while (N--) out << ' ';
+ }
+
+public:
+ DiffConsumer(Module *L, Module *R)
+ : out(errs()), LModule(L), RModule(R), Differences(false), Indent(0) {}
+
+ bool hadDifferences() const { return Differences; }
+
+ void enterContext(Value *L, Value *R) {
+ contexts.push_back(DiffContext(L, R));
+ Indent += 2;
+ }
+ void exitContext() {
+ Differences |= contexts.back().Differences;
+ contexts.pop_back();
+ Indent -= 2;
+ }
+
+ void log(StringRef text) {
+ header();
+ indent();
+ out << text << '\n';
+ }
+
+ void logf(const DifferenceEngine::LogBuilder &Log) {
+ header();
+ indent();
+
+ unsigned arg = 0;
+
+ StringRef format = Log.getFormat();
+ while (true) {
+ size_t percent = format.find('%');
+ if (percent == StringRef::npos) {
+ out << format;
+ break;
+ }
+ assert(format[percent] == '%');
+
+ if (percent > 0) out << format.substr(0, percent);
+
+ switch (format[percent+1]) {
+ case '%': out << '%'; break;
+ case 'l': printValue(Log.getArgument(arg++), true); break;
+ case 'r': printValue(Log.getArgument(arg++), false); break;
+ default: llvm_unreachable("unknown format character");
+ }
+
+ format = format.substr(percent+2);
+ }
+
+ out << '\n';
+ }
+
+ void logd(const DifferenceEngine::DiffLogBuilder &Log) {
+ header();
+
+ for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) {
+ indent();
+ switch (Log.getLineKind(I)) {
+ case DifferenceEngine::DC_match:
+ out << " ";
+ Log.getLeft(I)->dump();
+ //printValue(Log.getLeft(I), true);
+ break;
+ case DifferenceEngine::DC_left:
+ out << "< ";
+ Log.getLeft(I)->dump();
+ //printValue(Log.getLeft(I), true);
+ break;
+ case DifferenceEngine::DC_right:
+ out << "> ";
+ Log.getRight(I)->dump();
+ //printValue(Log.getRight(I), false);
+ break;
+ }
+ //out << "\n";
+ }
+ }
+
+};
+}
+
+static void diffGlobal(DifferenceEngine &Engine, Module *L, Module *R,
+ StringRef Name) {
+ // Drop leading sigils from the global name.
+ if (Name.startswith("@")) Name = Name.substr(1);
+
+ Function *LFn = L->getFunction(Name);
+ Function *RFn = R->getFunction(Name);
+ if (LFn && RFn)
+ Engine.diff(LFn, RFn);
+ else if (!LFn && !RFn)
+ errs() << "No function named @" << Name << " in either module\n";
+ else if (!LFn)
+ errs() << "No function named @" << Name << " in left module\n";
+ else
+ errs() << "No function named @" << Name << " in right module\n";
+}
+
+cl::opt<std::string> LeftFilename(cl::Positional,
+ cl::desc("<first file>"),
+ cl::Required);
+cl::opt<std::string> RightFilename(cl::Positional,
+ cl::desc("<second file>"),
+ cl::Required);
+cl::list<std::string> GlobalsToCompare(cl::Positional,
+ cl::desc("<globals to compare>"));
+
+int main(int argc, char **argv) {
+ cl::ParseCommandLineOptions(argc, argv);
+
+ LLVMContext Context;
+
+ // Load both modules. Die if that fails.
+ Module *LModule = ReadModule(Context, LeftFilename);
+ Module *RModule = ReadModule(Context, RightFilename);
+ if (!LModule || !RModule) return 1;
+
+ DiffConsumer Consumer(LModule, RModule);
+ DifferenceEngine Engine(Context, Consumer);
+
+ // If any global names were given, just diff those.
+ if (!GlobalsToCompare.empty()) {
+ for (unsigned I = 0, E = GlobalsToCompare.size(); I != E; ++I)
+ diffGlobal(Engine, LModule, RModule, GlobalsToCompare[I]);
+
+ // Otherwise, diff everything in the module.
+ } else {
+ Engine.diff(LModule, RModule);
+ }
+
+ delete LModule;
+ delete RModule;
+
+ return Consumer.hadDifferences();
+}
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index b8b1a39384cd..9d2d31da164d 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -18,14 +18,16 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Type.h"
#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
-#include <memory>
using namespace llvm;
static cl::opt<std::string>
@@ -41,6 +43,29 @@ Force("f", cl::desc("Enable binary output on terminals"));
static cl::opt<bool>
DontPrint("disable-output", cl::desc("Don't output the .ll file"), cl::Hidden);
+static cl::opt<bool>
+ShowAnnotations("show-annotations",
+ cl::desc("Add informational comments to the .ll file"));
+
+namespace {
+
+class CommentWriter : public AssemblyAnnotationWriter {
+public:
+ void emitFunctionAnnot(const Function *F,
+ formatted_raw_ostream &OS) {
+ OS << "; [#uses=" << F->getNumUses() << ']'; // Output # uses
+ OS << '\n';
+ }
+ void printInfoComment(const Value &V, formatted_raw_ostream &OS) {
+ if (V.getType()->isVoidTy()) return;
+
+ OS.PadToColumn(50);
+ OS << "; [#uses=" << V.getNumUses() << ']'; // Output # uses
+ }
+};
+
+} // end anon namespace
+
int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
@@ -88,23 +113,25 @@ int main(int argc, char **argv) {
}
}
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT.
- if (OutputFilename != "-")
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
std::string ErrorInfo;
- std::auto_ptr<raw_fd_ostream>
- Out(new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary));
+ OwningPtr<tool_output_file>
+ Out(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary));
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
return 1;
}
+ OwningPtr<AssemblyAnnotationWriter> Annotator;
+ if (ShowAnnotations)
+ Annotator.reset(new CommentWriter());
+
// All that llvm-dis does is write the assembly to a file.
if (!DontPrint)
- *Out << *M;
+ M->print(Out->os(), Annotator.get());
+
+ // Declare success.
+ Out->keep();
return 0;
}
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index e6b5b8465d7b..91a59e5a56da 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -44,10 +44,6 @@ Force("f", cl::desc("Enable binary output on terminals"));
static cl::opt<bool>
DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
-static cl::opt<bool>
-Relink("relink",
- cl::desc("Turn external linkage for callees of function to delete"));
-
// ExtractFuncs - The functions to extract from the module...
static cl::list<std::string>
ExtractFuncs("func", cl::desc("Specify function to extract"),
@@ -71,9 +67,10 @@ int main(int argc, char **argv) {
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
+ // Use lazy loading, since we only care about selected global values.
SMDiagnostic Err;
std::auto_ptr<Module> M;
- M.reset(ParseIRFile(InputFilename, Err, Context));
+ M.reset(getLazyIRFileModule(InputFilename, Err, Context));
if (M.get() == 0) {
Err.Print(argv[0], errs());
@@ -104,36 +101,47 @@ int main(int argc, char **argv) {
GVs.push_back(GV);
}
+ // Materialize requisite global values.
+ for (size_t i = 0, e = GVs.size(); i != e; ++i) {
+ GlobalValue *GV = GVs[i];
+ if (GV->isMaterializable()) {
+ std::string ErrInfo;
+ if (GV->Materialize(&ErrInfo)) {
+ errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+ return 1;
+ }
+ }
+ }
+
// In addition to deleting all other functions, we also want to spiff it
// up a little bit. Do this now.
PassManager Passes;
Passes.add(new TargetData(M.get())); // Use correct TargetData
- Passes.add(createGVExtractionPass(GVs, DeleteFn, Relink));
+ Passes.add(createGVExtractionPass(GVs, DeleteFn));
if (!DeleteFn)
Passes.add(createGlobalDCEPass()); // Delete unreachable globals
Passes.add(createStripDeadDebugInfoPass()); // Remove dead debug info
Passes.add(createDeadTypeEliminationPass()); // Remove dead types...
Passes.add(createStripDeadPrototypesPass()); // Remove dead func decls
- // Make sure that the Output file gets unlinked from the disk if we get a
- // SIGINT
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
std::string ErrorInfo;
- raw_fd_ostream Out(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
+ tool_output_file Out(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
return 1;
}
if (OutputAssembly)
- Passes.add(createPrintModulePass(&Out));
- else if (Force || !CheckBitcodeOutputToConsole(Out, true))
- Passes.add(createBitcodeWriterPass(Out));
+ Passes.add(createPrintModulePass(&Out.os()));
+ else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
+ Passes.add(createBitcodeWriterPass(Out.os()));
Passes.run(*M.get());
+ // Declare success.
+ Out.keep();
+
return 0;
}
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index ad6956cabd24..3bbea9dc7287 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -145,8 +145,8 @@ static void PrintCommand(const std::vector<const char*> &args) {
std::vector<const char*>::const_iterator I = args.begin(), E = args.end();
for (; I != E; ++I)
if (*I)
- outs() << "'" << *I << "'" << " ";
- outs() << "\n"; outs().flush();
+ errs() << "'" << *I << "'" << " ";
+ errs() << "\n";
}
/// CopyEnv - This function takes an array of environment variables and makes a
@@ -232,17 +232,20 @@ static void RemoveEnv(const char * name, char ** const envp) {
void GenerateBitcode(Module* M, const std::string& FileName) {
if (Verbose)
- outs() << "Generating Bitcode To " << FileName << '\n';
+ errs() << "Generating Bitcode To " << FileName << '\n';
// Create the output file.
std::string ErrorInfo;
- raw_fd_ostream Out(FileName.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!ErrorInfo.empty())
+ tool_output_file Out(FileName.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
+ if (!ErrorInfo.empty()) {
PrintAndExit(ErrorInfo, M);
+ return;
+ }
// Write it out
- WriteBitcodeToFile(M, Out);
+ WriteBitcodeToFile(M, Out.os());
+ Out.keep();
}
/// GenerateAssembly - generates a native assembly language source file from the
@@ -272,7 +275,7 @@ static int GenerateAssembly(const std::string &OutputFilename,
args.push_back(0);
if (Verbose) {
- outs() << "Generating Assembly With: \n";
+ errs() << "Generating Assembly With: \n";
PrintCommand(args);
}
@@ -294,7 +297,7 @@ static int GenerateCFile(const std::string &OutputFile,
args.push_back(0);
if (Verbose) {
- outs() << "Generating C Source With: \n";
+ errs() << "Generating C Source With: \n";
PrintCommand(args);
}
@@ -391,7 +394,7 @@ static int GenerateNative(const std::string &OutputFilename,
Args.push_back(0);
if (Verbose) {
- outs() << "Generating Native Executable With:\n";
+ errs() << "Generating Native Executable With:\n";
PrintCommand(Args);
}
@@ -406,7 +409,7 @@ static int GenerateNative(const std::string &OutputFilename,
/// bitcode file for the program.
static void EmitShellScript(char **argv, Module *M) {
if (Verbose)
- outs() << "Emitting Shell Script\n";
+ errs() << "Emitting Shell Script\n";
#if defined(_WIN32) || defined(__CYGWIN__)
// Windows doesn't support #!/bin/sh style shell scripts in .exe files. To
// support windows systems, we copy the llvm-stub.exe executable from the
@@ -425,14 +428,14 @@ static void EmitShellScript(char **argv, Module *M) {
// Output the script to start the program...
std::string ErrorInfo;
- raw_fd_ostream Out2(OutputFilename.c_str(), ErrorInfo);
+ tool_output_file Out2(OutputFilename.c_str(), ErrorInfo);
if (!ErrorInfo.empty())
PrintAndExit(ErrorInfo, M);
- Out2 << "#!/bin/sh\n";
+ Out2.os() << "#!/bin/sh\n";
// Allow user to setenv LLVMINTERP if lli is not in their PATH.
- Out2 << "lli=${LLVMINTERP-lli}\n";
- Out2 << "exec $lli \\\n";
+ Out2.os() << "lli=${LLVMINTERP-lli}\n";
+ Out2.os() << "exec $lli \\\n";
// gcc accepts -l<lib> and implicitly searches /lib and /usr/lib.
LibPaths.push_back("/lib");
LibPaths.push_back("/usr/lib");
@@ -463,9 +466,10 @@ static void EmitShellScript(char **argv, Module *M) {
if (FullLibraryPath.isEmpty())
FullLibraryPath = sys::Path::FindLibrary(*i);
if (!FullLibraryPath.isEmpty())
- Out2 << " -load=" << FullLibraryPath.str() << " \\\n";
+ Out2.os() << " -load=" << FullLibraryPath.str() << " \\\n";
}
- Out2 << " " << BitcodeOutputFilename << " ${1+\"$@\"}\n";
+ Out2.os() << " " << BitcodeOutputFilename << " ${1+\"$@\"}\n";
+ Out2.keep();
}
// BuildLinkItems -- This function generates a LinkItemList for the LinkItems
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index f7dad3da5f61..e55d0de0f9b5 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -116,19 +116,13 @@ int main(int argc, char **argv) {
if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
std::string ErrorInfo;
- std::auto_ptr<raw_ostream>
- Out(new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary));
+ tool_output_file Out(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary);
if (!ErrorInfo.empty()) {
errs() << ErrorInfo << '\n';
return 1;
}
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT
- if (OutputFilename != "-")
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
if (verifyModule(*Composite)) {
errs() << argv[0] << ": linked module is broken!\n";
return 1;
@@ -136,9 +130,12 @@ int main(int argc, char **argv) {
if (Verbose) errs() << "Writing bitcode...\n";
if (OutputAssembly) {
- *Out << *Composite;
- } else if (Force || !CheckBitcodeOutputToConsole(*Out, true))
- WriteBitcodeToFile(Composite.get(), *Out);
+ Out.os() << *Composite;
+ } else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
+ WriteBitcodeToFile(Composite.get(), Out.os());
+
+ // Declare success.
+ Out.keep();
return 0;
}
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
index 8b61a4e8db15..805caf403a19 100644
--- a/tools/llvm-mc/CMakeLists.txt
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -1,5 +1,4 @@
-set( LLVM_USED_LIBS EnhancedDisassembly)
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser MCDisassembler)
add_llvm_tool(llvm-mc
llvm-mc.cpp
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 37b2cb805631..13080b481f14 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -13,21 +13,21 @@
//===----------------------------------------------------------------------===//
#include "Disassembler.h"
-
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SourceMgr.h"
-
-#include "llvm-c/EnhancedDisassembly.h"
-
using namespace llvm;
typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
@@ -53,7 +53,7 @@ public:
static bool PrintInsts(const MCDisassembler &DisAsm,
MCInstPrinter &Printer, const ByteArrayTy &Bytes,
- SourceMgr &SM) {
+ SourceMgr &SM, raw_ostream &Out) {
// Wrap the vector in a MemoryObject.
VectorMemoryObject memoryObject(Bytes);
@@ -66,8 +66,8 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
if (DisAsm.getInstruction(Inst, Size, memoryObject, Index,
/*REMOVE*/ nulls())) {
- Printer.printInst(&Inst, outs());
- outs() << "\n";
+ Printer.printInst(&Inst, Out);
+ Out << "\n";
} else {
SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
"invalid instruction encoding", "warning");
@@ -127,7 +127,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
}
int Disassembler::disassemble(const Target &T, const std::string &Triple,
- MemoryBuffer &Buffer) {
+ MemoryBuffer &Buffer,
+ raw_ostream &Out) {
// Set up disassembler.
OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
@@ -162,7 +163,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
if (!ByteArray.empty())
- ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM);
+ ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM, Out);
return ErrorOccurred;
}
@@ -179,26 +180,24 @@ static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
}
static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
- EDDisassemblerRef &disassembler = *((EDDisassemblerRef*)Arg);
+ EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
+ raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
- const char *regName;
+ if (const char *regName = disassembler.nameWithRegisterID(R))
+ Out << "[" << regName << "/" << R << "]";
- if (!EDGetRegisterName(&regName,
- disassembler,
- R))
- outs() << "[" << regName << "/" << R << "]";
- if (EDRegisterIsStackPointer(disassembler, R))
- outs() << "(sp)";
- if (EDRegisterIsProgramCounter(disassembler, R))
- outs() << "(pc)";
+ if (disassembler.registerIsStackPointer(R))
+ Out << "(sp)";
+ if (disassembler.registerIsProgramCounter(R))
+ Out << "(pc)";
*V = 0;
-
return 0;
}
int Disassembler::disassembleEnhanced(const std::string &TS,
- MemoryBuffer &Buffer) {
+ MemoryBuffer &Buffer,
+ raw_ostream &Out) {
ByteArrayTy ByteArray;
StringRef Str = Buffer.getBuffer();
SourceMgr SM;
@@ -209,10 +208,8 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
return -1;
}
- EDDisassemblerRef disassembler;
-
Triple T(TS);
- EDAssemblySyntax_t AS;
+ EDDisassembler::AssemblySyntax AS;
switch (T.getArch()) {
default:
@@ -220,140 +217,121 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
return -1;
case Triple::arm:
case Triple::thumb:
- AS = kEDAssemblySyntaxARMUAL;
+ AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
break;
case Triple::x86:
case Triple::x86_64:
- AS = kEDAssemblySyntaxX86ATT;
+ AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
break;
}
- if (EDGetDisassembler(&disassembler,
- TS.c_str(),
- AS)) {
- errs() << "error: couldn't get disassembler for " << TS.c_str() << "\n";
+ EDDisassembler::initialize();
+ EDDisassembler *disassembler =
+ EDDisassembler::getDisassembler(TS.c_str(), AS);
+
+ if (disassembler == 0) {
+ errs() << "error: couldn't get disassembler for " << TS << '\n';
return -1;
}
- EDInstRef inst;
-
- if (EDCreateInsts(&inst, 1, disassembler, byteArrayReader, 0,&ByteArray)
- != 1) {
+ EDInst *inst =
+ disassembler->createInst(byteArrayReader, 0, &ByteArray);
+
+ if (inst == 0) {
errs() << "error: Didn't get an instruction\n";
return -1;
}
- int numTokens = EDNumTokens(inst);
-
- if (numTokens < 0) {
- errs() << "error: Couldn't count the instruction's tokens\n";
+ unsigned numTokens = inst->numTokens();
+ if ((int)numTokens < 0) {
+ errs() << "error: couldn't count the instruction's tokens\n";
return -1;
}
- int tokenIndex;
-
- for (tokenIndex = 0; tokenIndex < numTokens; ++tokenIndex) {
- EDTokenRef token;
+ for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
+ EDToken *token;
- if (EDGetToken(&token, inst, tokenIndex)) {
+ if (inst->getToken(token, tokenIndex)) {
errs() << "error: Couldn't get token\n";
return -1;
}
const char *buf;
-
- if (EDGetTokenString(&buf, token)) {
+ if (token->getString(buf)) {
errs() << "error: Couldn't get string for token\n";
return -1;
}
- outs() << "[";
-
- int operandIndex = EDOperandIndexForToken(token);
+ Out << '[';
+ int operandIndex = token->operandID();
if (operandIndex >= 0)
- outs() << operandIndex << "-";
+ Out << operandIndex << "-";
- if (EDTokenIsWhitespace(token)) {
- outs() << "w";
- } else if (EDTokenIsPunctuation(token)) {
- outs() << "p";
- } else if (EDTokenIsOpcode(token)) {
- outs() << "o";
- } else if (EDTokenIsLiteral(token)) {
- outs() << "l";
- } else if (EDTokenIsRegister(token)) {
- outs() << "r";
- } else {
- outs() << "?";
+ switch (token->type()) {
+ default: Out << "?"; break;
+ case EDToken::kTokenWhitespace: Out << "w"; break;
+ case EDToken::kTokenPunctuation: Out << "p"; break;
+ case EDToken::kTokenOpcode: Out << "o"; break;
+ case EDToken::kTokenLiteral: Out << "l"; break;
+ case EDToken::kTokenRegister: Out << "r"; break;
}
- outs() << ":" << buf;
+ Out << ":" << buf;
- if (EDTokenIsLiteral(token)) {
- outs() << "=";
- if (EDTokenIsNegativeLiteral(token))
- outs() << "-";
+ if (token->type() == EDToken::kTokenLiteral) {
+ Out << "=";
+ if (token->literalSign())
+ Out << "-";
uint64_t absoluteValue;
- if (EDLiteralTokenAbsoluteValue(&absoluteValue, token)) {
+ if (token->literalAbsoluteValue(absoluteValue)) {
errs() << "error: Couldn't get the value of a literal token\n";
return -1;
}
- outs() << absoluteValue;
- } else if (EDTokenIsRegister(token)) {
- outs() << "=";
+ Out << absoluteValue;
+ } else if (token->type() == EDToken::kTokenRegister) {
+ Out << "=";
unsigned regID;
- if (EDRegisterTokenValue(&regID, token)) {
+ if (token->registerID(regID)) {
errs() << "error: Couldn't get the ID of a register token\n";
return -1;
}
- outs() << "r" << regID;
+ Out << "r" << regID;
}
- outs() << "]";
+ Out << "]";
}
- outs() << " ";
+ Out << " ";
- if (EDInstIsBranch(inst))
- outs() << "<br> ";
- if (EDInstIsMove(inst))
- outs() << "<mov> ";
+ if (inst->isBranch())
+ Out << "<br> ";
+ if (inst->isMove())
+ Out << "<mov> ";
- int numOperands = EDNumOperands(inst);
+ unsigned numOperands = inst->numOperands();
- if (numOperands < 0) {
+ if ((int)numOperands < 0) {
errs() << "error: Couldn't count operands\n";
return -1;
}
- int operandIndex;
-
- for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
- outs() << operandIndex << ":";
-
- EDOperandRef operand;
+ for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) {
+ Out << operandIndex << ":";
- if (EDGetOperand(&operand,
- inst,
- operandIndex)) {
- errs() << "error: Couldn't get operand\n";
+ EDOperand *operand;
+ if (inst->getOperand(operand, operandIndex)) {
+ errs() << "error: couldn't get operand\n";
return -1;
}
uint64_t evaluatedResult;
-
- EDEvaluateOperand(&evaluatedResult,
- operand,
- verboseEvaluator,
- &disassembler);
-
- outs() << "=" << evaluatedResult;
-
- outs() << " ";
+ void *Arg[] = { disassembler, &Out };
+ evaluatedResult = operand->evaluate(evaluatedResult, verboseEvaluator, Arg);
+ Out << "=" << evaluatedResult << " ";
}
- outs() << "\n";
+ Out << '\n';
return 0;
}
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index 3da23965bdb1..b56f2e954555 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -21,15 +21,18 @@ namespace llvm {
class Target;
class MemoryBuffer;
+class raw_ostream;
class Disassembler {
public:
static int disassemble(const Target &target,
const std::string &tripleString,
- MemoryBuffer &buffer);
+ MemoryBuffer &buffer,
+ raw_ostream &Out);
static int disassembleEnhanced(const std::string &tripleString,
- MemoryBuffer &buffer);
+ MemoryBuffer &buffer,
+ raw_ostream &Out);
};
} // namespace llvm
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
index a12749324956..934a6e4dd081 100644
--- a/tools/llvm-mc/Makefile
+++ b/tools/llvm-mc/Makefile
@@ -18,9 +18,7 @@ TOOL_NO_EXPORTS = 1
# early so we can set up LINK_COMPONENTS before including Makefile.rules
include $(LEVEL)/Makefile.config
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCParser MC support
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCDisassembler MCParser MC support
include $(LLVM_SRC_ROOT)/Makefile.rules
-# Using LIBS instead of USEDLIBS to force static linking
-LIBS += $(LLVMLibDir)/libEnhancedDisassembly.a
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index fc8a1c5523ae..aef0a3dffa45 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -12,13 +12,13 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/Target/TargetAsmBackend.h"
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Target/TargetData.h"
@@ -27,6 +27,7 @@
#include "llvm/Target/TargetSelect.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -51,6 +52,10 @@ ShowEncoding("show-encoding", cl::desc("Show instruction encodings"));
static cl::opt<bool>
ShowInst("show-inst", cl::desc("Show internal instruction representation"));
+static cl::opt<bool>
+ShowInstOperands("show-inst-operands",
+ cl::desc("Show instructions operands as parsed"));
+
static cl::opt<unsigned>
OutputAsmVariant("output-asm-variant",
cl::desc("Syntax variant to use for output printing"));
@@ -135,6 +140,22 @@ static const Target *GetTarget(const char *ProgName) {
return 0;
}
+static tool_output_file *GetOutputStream() {
+ if (OutputFilename == "")
+ OutputFilename = "-";
+
+ std::string Err;
+ tool_output_file *Out = new tool_output_file(OutputFilename.c_str(), Err,
+ raw_fd_ostream::F_Binary);
+ if (!Err.empty()) {
+ errs() << Err << '\n';
+ delete Out;
+ return 0;
+ }
+
+ return Out;
+}
+
static int AsLexInput(const char *ProgName) {
std::string ErrorMessage;
MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename,
@@ -165,9 +186,13 @@ static int AsLexInput(const char *ProgName) {
assert(MAI && "Unable to create target asm info!");
AsmLexer Lexer(*MAI);
-
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(0));
+
+ OwningPtr<tool_output_file> Out(GetOutputStream());
+ if (!Out)
+ return 1;
+
bool Error = false;
-
while (Lexer.Lex().isNot(AsmToken::Eof)) {
switch (Lexer.getKind()) {
default:
@@ -178,69 +203,51 @@ static int AsLexInput(const char *ProgName) {
Error = true; // error already printed.
break;
case AsmToken::Identifier:
- outs() << "identifier: " << Lexer.getTok().getString() << '\n';
+ Out->os() << "identifier: " << Lexer.getTok().getString() << '\n';
break;
case AsmToken::String:
- outs() << "string: " << Lexer.getTok().getString() << '\n';
+ Out->os() << "string: " << Lexer.getTok().getString() << '\n';
break;
case AsmToken::Integer:
- outs() << "int: " << Lexer.getTok().getString() << '\n';
+ Out->os() << "int: " << Lexer.getTok().getString() << '\n';
break;
- case AsmToken::Amp: outs() << "Amp\n"; break;
- case AsmToken::AmpAmp: outs() << "AmpAmp\n"; break;
- case AsmToken::Caret: outs() << "Caret\n"; break;
- case AsmToken::Colon: outs() << "Colon\n"; break;
- case AsmToken::Comma: outs() << "Comma\n"; break;
- case AsmToken::Dollar: outs() << "Dollar\n"; break;
- case AsmToken::EndOfStatement: outs() << "EndOfStatement\n"; break;
- case AsmToken::Eof: outs() << "Eof\n"; break;
- case AsmToken::Equal: outs() << "Equal\n"; break;
- case AsmToken::EqualEqual: outs() << "EqualEqual\n"; break;
- case AsmToken::Exclaim: outs() << "Exclaim\n"; break;
- case AsmToken::ExclaimEqual: outs() << "ExclaimEqual\n"; break;
- case AsmToken::Greater: outs() << "Greater\n"; break;
- case AsmToken::GreaterEqual: outs() << "GreaterEqual\n"; break;
- case AsmToken::GreaterGreater: outs() << "GreaterGreater\n"; break;
- case AsmToken::LParen: outs() << "LParen\n"; break;
- case AsmToken::Less: outs() << "Less\n"; break;
- case AsmToken::LessEqual: outs() << "LessEqual\n"; break;
- case AsmToken::LessGreater: outs() << "LessGreater\n"; break;
- case AsmToken::LessLess: outs() << "LessLess\n"; break;
- case AsmToken::Minus: outs() << "Minus\n"; break;
- case AsmToken::Percent: outs() << "Percent\n"; break;
- case AsmToken::Pipe: outs() << "Pipe\n"; break;
- case AsmToken::PipePipe: outs() << "PipePipe\n"; break;
- case AsmToken::Plus: outs() << "Plus\n"; break;
- case AsmToken::RParen: outs() << "RParen\n"; break;
- case AsmToken::Slash: outs() << "Slash\n"; break;
- case AsmToken::Star: outs() << "Star\n"; break;
- case AsmToken::Tilde: outs() << "Tilde\n"; break;
+ case AsmToken::Amp: Out->os() << "Amp\n"; break;
+ case AsmToken::AmpAmp: Out->os() << "AmpAmp\n"; break;
+ case AsmToken::Caret: Out->os() << "Caret\n"; break;
+ case AsmToken::Colon: Out->os() << "Colon\n"; break;
+ case AsmToken::Comma: Out->os() << "Comma\n"; break;
+ case AsmToken::Dollar: Out->os() << "Dollar\n"; break;
+ case AsmToken::EndOfStatement: Out->os() << "EndOfStatement\n"; break;
+ case AsmToken::Eof: Out->os() << "Eof\n"; break;
+ case AsmToken::Equal: Out->os() << "Equal\n"; break;
+ case AsmToken::EqualEqual: Out->os() << "EqualEqual\n"; break;
+ case AsmToken::Exclaim: Out->os() << "Exclaim\n"; break;
+ case AsmToken::ExclaimEqual: Out->os() << "ExclaimEqual\n"; break;
+ case AsmToken::Greater: Out->os() << "Greater\n"; break;
+ case AsmToken::GreaterEqual: Out->os() << "GreaterEqual\n"; break;
+ case AsmToken::GreaterGreater: Out->os() << "GreaterGreater\n"; break;
+ case AsmToken::LParen: Out->os() << "LParen\n"; break;
+ case AsmToken::Less: Out->os() << "Less\n"; break;
+ case AsmToken::LessEqual: Out->os() << "LessEqual\n"; break;
+ case AsmToken::LessGreater: Out->os() << "LessGreater\n"; break;
+ case AsmToken::LessLess: Out->os() << "LessLess\n"; break;
+ case AsmToken::Minus: Out->os() << "Minus\n"; break;
+ case AsmToken::Percent: Out->os() << "Percent\n"; break;
+ case AsmToken::Pipe: Out->os() << "Pipe\n"; break;
+ case AsmToken::PipePipe: Out->os() << "PipePipe\n"; break;
+ case AsmToken::Plus: Out->os() << "Plus\n"; break;
+ case AsmToken::RParen: Out->os() << "RParen\n"; break;
+ case AsmToken::Slash: Out->os() << "Slash\n"; break;
+ case AsmToken::Star: Out->os() << "Star\n"; break;
+ case AsmToken::Tilde: Out->os() << "Tilde\n"; break;
}
}
-
- return Error;
-}
-static formatted_raw_ostream *GetOutputStream() {
- if (OutputFilename == "")
- OutputFilename = "-";
-
- // Make sure that the Out file gets unlinked from the disk if we get a
- // SIGINT.
- if (OutputFilename != "-")
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
- std::string Err;
- raw_fd_ostream *Out = new raw_fd_ostream(OutputFilename.c_str(), Err,
- raw_fd_ostream::F_Binary);
- if (!Err.empty()) {
- errs() << Err << '\n';
- delete Out;
- return 0;
- }
-
- return new formatted_raw_ostream(*Out, formatted_raw_ostream::DELETE_STREAM);
+ // Keep output if no errors.
+ if (Error == 0) Out->keep();
+
+ return Error;
}
static int AssembleInput(const char *ProgName) {
@@ -273,10 +280,6 @@ static int AssembleInput(const char *ProgName) {
assert(MAI && "Unable to create target asm info!");
MCContext Ctx(*MAI);
- formatted_raw_ostream *Out = GetOutputStream();
- if (!Out)
- return 1;
-
// FIXME: We shouldn't need to do this (and link in codegen).
OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName, ""));
@@ -287,47 +290,52 @@ static int AssembleInput(const char *ProgName) {
return 1;
}
- OwningPtr<MCCodeEmitter> CE;
+ OwningPtr<tool_output_file> Out(GetOutputStream());
+ if (!Out)
+ return 1;
+
+ formatted_raw_ostream FOS(Out->os());
OwningPtr<MCStreamer> Str;
- OwningPtr<TargetAsmBackend> TAB;
if (FileType == OFT_AssemblyFile) {
MCInstPrinter *IP =
TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI);
+ MCCodeEmitter *CE = 0;
if (ShowEncoding)
- CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
- Str.reset(createAsmStreamer(Ctx, *Out,TM->getTargetData()->isLittleEndian(),
- /*asmverbose*/true, IP, CE.get(), ShowInst));
+ CE = TheTarget->createCodeEmitter(*TM, Ctx);
+ Str.reset(createAsmStreamer(Ctx, FOS,
+ TM->getTargetData()->isLittleEndian(),
+ /*asmverbose*/true, IP, CE, ShowInst));
} else if (FileType == OFT_Null) {
Str.reset(createNullStreamer(Ctx));
} else {
assert(FileType == OFT_ObjectFile && "Invalid file type!");
- CE.reset(TheTarget->createCodeEmitter(*TM, Ctx));
- TAB.reset(TheTarget->createAsmBackend(TripleName));
+ MCCodeEmitter *CE = TheTarget->createCodeEmitter(*TM, Ctx);
+ TargetAsmBackend *TAB = TheTarget->createAsmBackend(TripleName);
Str.reset(TheTarget->createObjectStreamer(TripleName, Ctx, *TAB,
- *Out, CE.get(), RelaxAll));
+ FOS, CE, RelaxAll));
}
if (EnableLogging) {
Str.reset(createLoggingStreamer(Str.take(), errs()));
}
- AsmParser Parser(*TheTarget, SrcMgr, Ctx, *Str.get(), *MAI);
- OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(Parser));
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(*TheTarget, SrcMgr, Ctx,
+ *Str.get(), *MAI));
+ OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(*Parser, *TM));
if (!TAP) {
errs() << ProgName
<< ": error: this target does not support assembly parsing.\n";
return 1;
}
- Parser.setTargetParser(*TAP.get());
+ Parser->setShowParsedOperands(ShowInstOperands);
+ Parser->setTargetParser(*TAP.get());
- int Res = Parser.Run(NoInitialTextSection);
- delete Out;
+ int Res = Parser->Run(NoInitialTextSection);
- // Delete output on errors.
- if (Res && OutputFilename != "-")
- sys::Path(OutputFilename).eraseFromDisk();
+ // Keep output if no errors.
+ if (Res == 0) Out->keep();
return Res;
}
@@ -351,10 +359,20 @@ static int DisassembleInput(const char *ProgName, bool Enhanced) {
return 1;
}
+ OwningPtr<tool_output_file> Out(GetOutputStream());
+ if (!Out)
+ return 1;
+
+ int Res;
if (Enhanced)
- return Disassembler::disassembleEnhanced(TripleName, *Buffer);
+ Res = Disassembler::disassembleEnhanced(TripleName, *Buffer, Out->os());
else
- return Disassembler::disassemble(*TheTarget, TripleName, *Buffer);
+ Res = Disassembler::disassemble(*TheTarget, TripleName, *Buffer, Out->os());
+
+ // Keep output if no errors.
+ if (Res == 0) Out->keep();
+
+ return Res;
}
@@ -373,6 +391,7 @@ int main(int argc, char **argv) {
llvm::InitializeAllDisassemblers();
cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+ TripleName = Triple::normalize(TripleName);
switch (Action) {
default:
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index fd7e7f670b95..daa85712379a 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -88,10 +88,13 @@ static char TypeCharForSymbol(GlobalValue &GV) {
static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
// Private linkage and available_externally linkage don't exist in symtab.
- if (GV.hasPrivateLinkage() || GV.hasLinkerPrivateLinkage() ||
- GV.hasLinkerPrivateWeakLinkage() || GV.hasAvailableExternallyLinkage())
+ if (GV.hasPrivateLinkage() ||
+ GV.hasLinkerPrivateLinkage() ||
+ GV.hasLinkerPrivateWeakLinkage() ||
+ GV.hasLinkerPrivateWeakDefAutoLinkage() ||
+ GV.hasAvailableExternallyLinkage())
return;
-
+
const std::string SymbolAddrStr = " "; // Not used yet...
char TypeChar = TypeCharForSymbol(GV);
if ((TypeChar != 'U') && UndefinedOnly)
@@ -145,13 +148,13 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
Module *Result = 0;
if (Buffer.get())
Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
-
+
if (Result) {
DumpSymbolNamesFromModule(Result);
delete Result;
} else
errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
-
+
} else if (aPath.isArchive()) {
std::string ErrMsg;
Archive* archive = Archive::OpenAndLoad(sys::Path(Filename), Context,
@@ -176,7 +179,7 @@ int main(int argc, char **argv) {
// Print a stack trace if we signal out.
sys::PrintStackTraceOnErrorSignal();
PrettyStackTraceProgram X(argc, argv);
-
+
llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 88adeb43e6de..1c63d974eae5 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -17,12 +17,13 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/PassManager.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Analysis/ProfileInfoLoader.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/PrettyStackTrace.h"
@@ -75,9 +76,10 @@ namespace {
class ProfileAnnotator : public AssemblyAnnotationWriter {
ProfileInfo &PI;
public:
- ProfileAnnotator(ProfileInfo& pi) : PI(pi) {}
+ ProfileAnnotator(ProfileInfo &pi) : PI(pi) {}
- virtual void emitFunctionAnnot(const Function *F, raw_ostream &OS) {
+ virtual void emitFunctionAnnot(const Function *F,
+ formatted_raw_ostream &OS) {
double w = PI.getExecutionCount(F);
if (w != ProfileInfo::MissingValue) {
OS << ";;; %" << F->getName() << " called "<<(unsigned)w
@@ -85,7 +87,7 @@ namespace {
}
}
virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
- raw_ostream &OS) {
+ formatted_raw_ostream &OS) {
double w = PI.getExecutionCount(BB);
if (w != ProfileInfo::MissingValue) {
if (w != 0) {
@@ -96,7 +98,8 @@ namespace {
}
}
- virtual void emitBasicBlockEndAnnot(const BasicBlock *BB, raw_ostream &OS) {
+ virtual void emitBasicBlockEndAnnot(const BasicBlock *BB,
+ formatted_raw_ostream &OS) {
// Figure out how many times each successor executed.
std::vector<std::pair<ProfileInfo::Edge, double> > SuccCounts;
@@ -128,7 +131,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo.
explicit ProfileInfoPrinterPass(ProfileInfoLoader &_PIL)
- : ModulePass(&ID), PIL(_PIL) {}
+ : ModulePass(ID), PIL(_PIL) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index ce29bf809e63..52381304bb93 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -15,6 +15,17 @@ NO_BUILD_ARCHIVE = 1
LINK_LIBS_IN_SHARED = 1
SHARED_LIBRARY = 1
+include $(LEVEL)/Makefile.config
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ EXPORTED_SYMBOL_FILE = $(ObjDir)/$(LIBRARYNAME).exports
+
+ # It is needed to force static-stdc++.a linked.
+ # FIXME: It should be omitted when configure detects system's stdc++.dll.
+ SHLIB_FRAG_NAMES += stdc++.a.o
+
+endif
+
include $(LEVEL)/Makefile.common
# Include all archives in libLLVM.(so|dylib) except the ones that have
@@ -38,7 +49,6 @@ ifeq ($(HOST_OS),Darwin)
LLVMLibsOptions := $(LLVMLibsOptions) -all_load
# extra options to override libtool defaults
LLVMLibsOptions := $(LLVMLibsOptions) \
- -avoid-version \
-Wl,-dead_strip \
-Wl,-seg1addr -Wl,0xE0000000
@@ -58,3 +68,44 @@ ifeq ($(HOST_OS), Linux)
# Don't allow unresolved symbols.
LLVMLibsOptions += -Wl,--no-undefined
endif
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+
+SHLIB_STUBS := $(addprefix $(ObjDir)/, $(SHLIB_FRAG_NAMES))
+SHLIB_FRAGS := $(patsubst %.a.o, $(ObjDir)/%.syms.txt, $(LIBRARYNAME).a.o $(SHLIB_FRAG_NAMES))
+LLVMLibsOptions := $(SHLIB_STUBS) $(LLVMLibsOptions)
+
+$(LibName.SO): $(SHLIB_STUBS)
+
+%.syms.txt: %.a.o
+ $(Echo) Collecting global symbols of $(notdir $*)
+ $(Verb) $(NM_PATH) -g $< > $@
+
+$(ObjDir)/$(LIBRARYNAME).exports: $(SHLIB_FRAGS) $(ObjDir)/.dir
+ $(Echo) Generating exports for $(LIBRARYNAME)
+ $(Verb) ($(SED) -n \
+ -e "s/^.* T _\([^.][^.]*\)$$/\1/p" \
+ -e "s/^.* [BDR] _\([^.][^.]*\)$$/\1 DATA/p" \
+ $(SHLIB_FRAGS) \
+ | sort -u) > $@
+
+$(ObjDir)/$(LIBRARYNAME).a.o: $(LLVMLibsPaths) $(ObjDir)/.dir
+ $(Echo) Linking all LLVMLibs together for $(LIBRARYNAME)
+ $(Verb) $(Link) -nostartfiles -Wl,-r -nodefaultlibs -o $@ \
+ -Wl,--whole-archive $(LLVMLibsPaths) \
+ -Wl,--no-whole-archive
+
+$(ObjDir)/stdc++.a.o: $(ObjDir)/.dir
+ $(Echo) Linking all libs together for static libstdc++.a
+ $(Verb) $(Link) -nostartfiles -Wl,-r -nodefaultlibs -o $@ \
+ -Wl,--whole-archive -lstdc++ \
+ -Wl,--no-whole-archive
+# FIXME: workaround to invalidate -lstdc++
+ $(Echo) Making dummy -lstdc++ to lib
+ $(Verb) $(AR) rc $(ToolDir)/libstdc++.dll.a
+# FIXME: Is install-local needed?
+
+clean-local::
+ $(Verb) $(RM) -f $(ToolDir)/libstdc++.dll.a
+
+endif
diff --git a/tools/llvmc/CMakeLists.txt b/tools/llvmc/CMakeLists.txt
index bebaaebca8d6..10ad5d82009f 100644
--- a/tools/llvmc/CMakeLists.txt
+++ b/tools/llvmc/CMakeLists.txt
@@ -1,4 +1,4 @@
-# add_subdirectory(driver)
+# add_subdirectory(src)
# TODO: support plugins and user-configured builds.
# See ./doc/LLVMC-Reference.rst "Customizing LLVMC: the compilation graph"
diff --git a/tools/llvmc/Makefile b/tools/llvmc/Makefile
index 8f995265d2cf..7c03e2a74f7c 100644
--- a/tools/llvmc/Makefile
+++ b/tools/llvmc/Makefile
@@ -9,10 +9,10 @@
LEVEL = ../..
-export LLVMC_BASED_DRIVER_NAME = llvmc
-export LLVMC_BUILTIN_PLUGINS = Base Clang
-REQUIRES_RTTI = 1
+DIRS = src
-DIRS = plugins driver
+ifeq ($(BUILD_EXAMPLES),1)
+ OPTIONAL_DIRS += examples
+endif
include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index ca8500d615d7..d160e758ea62 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -299,7 +299,7 @@ separate option groups syntactically.
* Possible option types:
- ``switch_option`` - a simple boolean switch without arguments, for example
- ``-O2`` or ``-time``. At most one occurrence is allowed.
+ ``-O2`` or ``-time``. At most one occurrence is allowed by default.
- ``parameter_option`` - option that takes one argument, for example
``-std=c99``. It is also allowed to use spaces instead of the equality
@@ -321,6 +321,13 @@ separate option groups syntactically.
option types, aliases are not allowed to have any properties besides the
aliased option name. Usage example: ``(alias_option "preprocess", "E")``
+ - ``switch_list_option`` - like ``switch_option`` with the ``zero_or_more``
+ property, but remembers how many times the switch was turned on. Useful
+ mostly for forwarding. Example: when ``-foo`` is a switch option (with the
+ ``zero_or_more`` property), the command ``driver -foo -foo`` is forwarded
+ as ``some-tool -foo``, but when ``-foo`` is a switch list, the same command
+ is forwarded as ``some-tool -foo -foo``.
+
* Possible option properties:
diff --git a/tools/llvmc/example/Hello/Hello.cpp b/tools/llvmc/example/Hello/Hello.cpp
deleted file mode 100644
index a7179eae8bd1..000000000000
--- a/tools/llvmc/example/Hello/Hello.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- Hello.cpp - Example code from "Writing an LLVMC Plugin" ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Test plugin for LLVMC. Shows how to write plugins without using TableGen.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/CompilationGraph.h"
-#include "llvm/CompilerDriver/Plugin.h"
-#include "llvm/Support/raw_ostream.h"
-
-namespace {
-struct MyPlugin : public llvmc::BasePlugin {
-
- void PreprocessOptions() const
- {}
-
- void PopulateLanguageMap(llvmc::LanguageMap&) const
- { outs() << "Hello!\n"; }
-
- void PopulateCompilationGraph(llvmc::CompilationGraph&) const
- {}
-};
-
-static llvmc::RegisterPlugin<MyPlugin> RP("Hello", "Hello World plugin");
-
-}
diff --git a/tools/llvmc/example/Simple/PluginMain.cpp b/tools/llvmc/example/Simple/PluginMain.cpp
deleted file mode 100644
index add8acb4a574..000000000000
--- a/tools/llvmc/example/Simple/PluginMain.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "AutoGenerated.inc"
diff --git a/tools/llvmc/example/Skeleton/Makefile b/tools/llvmc/example/Skeleton/Makefile
deleted file mode 100644
index f489abfbc574..000000000000
--- a/tools/llvmc/example/Skeleton/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-##===- llvmc/example/Skeleton/Makefile ---------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-# Change this so that $(BASE_LEVEL)/Makefile.common refers to
-# $LLVM_DIR/Makefile.common or $YOUR_LLVM_BASED_PROJECT/Makefile.common.
-export LLVMC_BASE_LEVEL = ../../../..
-
-# Change this to the name of your LLVMC-based driver.
-export LLVMC_BASED_DRIVER_NAME = llvmc-skeleton
-
-# List your plugin names here
-export LLVMC_BUILTIN_PLUGINS = # Plugin
-
-LEVEL = $(LLVMC_BASE_LEVEL)
-
-DIRS = plugins driver
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Skeleton/driver/Makefile b/tools/llvmc/example/Skeleton/driver/Makefile
deleted file mode 100644
index 93e795b198a6..000000000000
--- a/tools/llvmc/example/Skeleton/driver/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-##===- llvmc/example/Skeleton/driver/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = $(LLVMC_BASE_LEVEL)/..
-LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Skeleton/plugins/Makefile b/tools/llvmc/example/Skeleton/plugins/Makefile
deleted file mode 100644
index fb07f23d58e5..000000000000
--- a/tools/llvmc/example/Skeleton/plugins/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-##===- llvmc/example/Skeleton/plugins/Makefile -------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = $(LLVMC_BASE_LEVEL)/..
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-DIRS = $(LLVMC_BUILTIN_PLUGINS)
-endif
-
-export LLVMC_BUILTIN_PLUGIN=1
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Skeleton/plugins/Plugin/PluginMain.cpp b/tools/llvmc/example/Skeleton/plugins/Plugin/PluginMain.cpp
deleted file mode 100644
index add8acb4a574..000000000000
--- a/tools/llvmc/example/Skeleton/plugins/Plugin/PluginMain.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "AutoGenerated.inc"
diff --git a/tools/llvmc/example/mcc16/Makefile b/tools/llvmc/example/mcc16/Makefile
deleted file mode 100644
index e94bca2330dd..000000000000
--- a/tools/llvmc/example/mcc16/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-##===- llvmc/example/mcc16/Makefile ------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-export LLVMC_BASE_LEVEL = ../../../..
-export LLVMC_BASED_DRIVER_NAME = mcc16
-export LLVMC_BUILTIN_PLUGINS = PIC16Base
-
-LEVEL = $(LLVMC_BASE_LEVEL)
-
-DIRS = plugins driver
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/mcc16/driver/Makefile b/tools/llvmc/example/mcc16/driver/Makefile
deleted file mode 100644
index 670d8bdf94c0..000000000000
--- a/tools/llvmc/example/mcc16/driver/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-##===- llvmc/example/mcc16/driver/Makefile -----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = $(LLVMC_BASE_LEVEL)/..
-LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/mcc16/plugins/Makefile b/tools/llvmc/example/mcc16/plugins/Makefile
deleted file mode 100644
index fb07f23d58e5..000000000000
--- a/tools/llvmc/example/mcc16/plugins/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-##===- llvmc/example/Skeleton/plugins/Makefile -------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = $(LLVMC_BASE_LEVEL)/..
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-DIRS = $(LLVMC_BUILTIN_PLUGINS)
-endif
-
-export LLVMC_BUILTIN_PLUGIN=1
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/examples/Hello/Hello.cpp b/tools/llvmc/examples/Hello/Hello.cpp
new file mode 100644
index 000000000000..71f04fdd49d7
--- /dev/null
+++ b/tools/llvmc/examples/Hello/Hello.cpp
@@ -0,0 +1,29 @@
+//===- Hello.cpp - Example code from "Writing an LLVMC Plugin" ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shows how to write llvmc-based drivers without using TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/AutoGenerated.h"
+#include "llvm/CompilerDriver/Main.inc"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvmc {
+namespace autogenerated {
+
+int PreprocessOptions () { return 0; }
+
+int PopulateLanguageMap (LanguageMap&) { llvm::outs() << "Hello!\n"; return 0; }
+
+int PopulateCompilationGraph (CompilationGraph&) { return 0; }
+
+}
+}
diff --git a/tools/llvmc/example/Hello/Makefile b/tools/llvmc/examples/Hello/Makefile
index 10325e6a6d5f..c281be655dda 100644
--- a/tools/llvmc/example/Hello/Makefile
+++ b/tools/llvmc/examples/Hello/Makefile
@@ -1,4 +1,4 @@
-##===- tools/llvmc/plugins/Hello/Makefile ------------------*- Makefile -*-===##
+##===- tools/llvmc/examples/Hello/Makefile -----------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -9,6 +9,6 @@
LEVEL = ../../../..
-LLVMC_PLUGIN = Hello
+LLVMC_BASED_DRIVER = Hello
include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Simple/Makefile b/tools/llvmc/examples/Makefile
index d7adb5d851e5..8468e9310442 100644
--- a/tools/llvmc/example/Simple/Makefile
+++ b/tools/llvmc/examples/Makefile
@@ -1,4 +1,4 @@
-##===- tools/llvmc/plugins/Simple/Makefile -----------------*- Makefile -*-===##
+##===- tools/llvmc/examples/Makefile -----------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,9 +7,8 @@
#
##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
+LEVEL=../../..
-LLVMC_PLUGIN = Simple
-BUILT_SOURCES = AutoGenerated.inc
+PARALLEL_DIRS := Hello Simple mcc16 Skeleton
include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/plugins/Clang/Makefile b/tools/llvmc/examples/Simple/Makefile
index 5e5b88a44b22..c10387c4f556 100644
--- a/tools/llvmc/plugins/Clang/Makefile
+++ b/tools/llvmc/examples/Simple/Makefile
@@ -1,4 +1,4 @@
-##===- tools/llvmc/plugins/Clang/Makefile ------------------*- Makefile -*-===##
+##===- llvmc/examples/Simple/Makefile ----------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -9,7 +9,7 @@
LEVEL = ../../../..
-LLVMC_PLUGIN = Clang
-BUILT_SOURCES = AutoGenerated.inc
+LLVMC_BASED_DRIVER = Simple
+BUILT_SOURCES = Simple.inc
include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/examples/Simple/Simple.cpp b/tools/llvmc/examples/Simple/Simple.cpp
new file mode 100644
index 000000000000..8ac73130f9eb
--- /dev/null
+++ b/tools/llvmc/examples/Simple/Simple.cpp
@@ -0,0 +1,2 @@
+#include "llvm/CompilerDriver/Main.inc"
+#include "Simple.inc"
diff --git a/tools/llvmc/example/Simple/Simple.td b/tools/llvmc/examples/Simple/Simple.td
index 87bc385b7a70..b47483b5d382 100644
--- a/tools/llvmc/example/Simple/Simple.td
+++ b/tools/llvmc/examples/Simple/Simple.td
@@ -1,4 +1,4 @@
-//===- Simple.td - A simple plugin for LLVMC ------------------------------===//
+//===- Simple.td - A simple LLVMC-based driver ----------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,19 +7,19 @@
//
//===----------------------------------------------------------------------===//
//
-// A simple LLVMC-based gcc wrapper that shows how to write LLVMC plugins.
+// A simple LLVMC-based gcc wrapper.
//
// To compile, use this command:
//
-// $ cd $LLVMC_DIR/example/Simple
-// $ make
+// $ cd $LLVM_OBJ_DIR/tools/llvmc
+// $ make BUILD_EXAMPLES=1
//
// Run as:
//
-// $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+// $ $LLVM_OBJ_DIR/$(BuildMode)/bin/Simple
//
// For instructions on how to build your own LLVMC-based driver, see
-// the 'example/Skeleton' directory.
+// the 'examples/Skeleton' directory.
//===----------------------------------------------------------------------===//
include "llvm/CompilerDriver/Common.td"
@@ -28,10 +28,14 @@ def gcc : Tool<
[(in_language "c"),
(out_language "executable"),
(output_suffix "out"),
- (cmd_line "gcc $INFILE -o $OUTFILE"),
- (sink)
+ (command "gcc"),
+ (sink),
+
+ // -o is what is used by default, out_file_option here is included for
+ // instructive purposes.
+ (out_file_option "-o")
]>;
-def LanguageMap : LanguageMap<[LangToSuffixes<"c", ["c"]>]>;
+def LanguageMap : LanguageMap<[(lang_to_suffixes "c", "c")]>;
-def CompilationGraph : CompilationGraph<[Edge<"root", "gcc">]>;
+def CompilationGraph : CompilationGraph<[(edge "root", "gcc")]>;
diff --git a/tools/llvmc/example/Skeleton/plugins/Plugin/Plugin.td b/tools/llvmc/examples/Skeleton/AutoGenerated.td
index febb9ad374eb..97483ce92822 100644
--- a/tools/llvmc/example/Skeleton/plugins/Plugin/Plugin.td
+++ b/tools/llvmc/examples/Skeleton/AutoGenerated.td
@@ -1,6 +1,6 @@
-//===- Plugin.td - A skeleton plugin for LLVMC -------------*- tablegen -*-===//
+//===- AutoGenerated.td ------------------------------------*- tablegen -*-===//
//
-// Write the code for your plugin here.
+// Write the TableGen description of your llvmc-based driver here.
//
//===----------------------------------------------------------------------===//
diff --git a/tools/llvmc/examples/Skeleton/Hooks.cpp b/tools/llvmc/examples/Skeleton/Hooks.cpp
new file mode 100644
index 000000000000..ddd38f6e707c
--- /dev/null
+++ b/tools/llvmc/examples/Skeleton/Hooks.cpp
@@ -0,0 +1,12 @@
+//===--- Hooks.cpp - The LLVM Compiler Driver -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hook definitions should go here.
+//
+//===----------------------------------------------------------------------===//
diff --git a/tools/llvmc/driver/Main.cpp b/tools/llvmc/examples/Skeleton/Main.cpp
index b1f5b6798ae6..24c7768f93b7 100644
--- a/tools/llvmc/driver/Main.cpp
+++ b/tools/llvmc/examples/Skeleton/Main.cpp
@@ -7,8 +7,9 @@
//
//===----------------------------------------------------------------------===//
//
-// Just include CompilerDriver/Main.inc.
+// Just include CompilerDriver/Main.inc and AutoGenerated.inc.
//
//===----------------------------------------------------------------------===//
#include "llvm/CompilerDriver/Main.inc"
+#include "AutoGenerated.inc"
diff --git a/tools/llvmc/examples/Skeleton/Makefile b/tools/llvmc/examples/Skeleton/Makefile
new file mode 100644
index 000000000000..41ca8235e24d
--- /dev/null
+++ b/tools/llvmc/examples/Skeleton/Makefile
@@ -0,0 +1,20 @@
+##===- llvmc/examples/Skeleton/Makefile --------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+# Change this so that $(LEVEL)/Makefile.common refers to
+# $LLVM_OBJ_DIR/Makefile.common or $YOUR_LLVM_BASED_PROJECT/Makefile.common.
+export LEVEL = ../../../..
+
+# Change this to the name of your LLVMC-based driver.
+LLVMC_BASED_DRIVER = llvmc-skeleton
+
+# Change this to the name of .inc file built from your .td file.
+BUILT_SOURCES = AutoGenerated.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Skeleton/README b/tools/llvmc/examples/Skeleton/README
index 92216ae50451..61ff6fbf235d 100644
--- a/tools/llvmc/example/Skeleton/README
+++ b/tools/llvmc/examples/Skeleton/README
@@ -1,6 +1,6 @@
This is a template that can be used to create your own LLVMC-based drivers. Just
copy the `Skeleton` directory to the location of your preference and edit
-`Skeleton/Makefile` and `Skeleton/plugins/Plugin`.
+`Skeleton/Makefile` and `Skeleton/AutoGenerated.inc`.
The build system assumes that your project is based on LLVM.
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp b/tools/llvmc/examples/mcc16/Hooks.cpp
index 9b2f9fc5448d..edb91e16aa90 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/PluginMain.cpp
+++ b/tools/llvmc/examples/mcc16/Hooks.cpp
@@ -1,19 +1,23 @@
-#include "AutoGenerated.inc"
-
#include "llvm/System/Path.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
+#include <string>
namespace llvmc {
extern char *ProgramName;
+
+ namespace autogenerated {
+ extern llvm::cl::opt<std::string> Parameter_p;
+ }
}
-
+using namespace llvm;
+using namespace llvmc;
// Returns the platform specific directory separator via #ifdefs.
-// FIXME: This currently work on linux and windows only. It does not
-// work on other unices.
+// FIXME: This currently work on linux and windows only. It does not
+// work on other unices.
static std::string GetDirSeparator() {
#if __linux__ || __APPLE__
return "/";
@@ -28,14 +32,14 @@ namespace hooks {
std::string
GetLowerCasePartDefine(void) {
std::string Partname;
- if (AutoGeneratedParameter_p.empty()) {
+ if (autogenerated::Parameter_p.empty()) {
Partname = "16f1xxx";
} else {
- Partname = AutoGeneratedParameter_p;
+ Partname = autogenerated::Parameter_p;
}
std::string LowerCase;
- for (unsigned i = 0; i <= Partname.size(); i++) {
+ for (unsigned i = 0; i < Partname.size(); i++) {
LowerCase.push_back(std::tolower(Partname[i]));
}
@@ -45,26 +49,25 @@ GetLowerCasePartDefine(void) {
std::string
GetUpperCasePartDefine(void) {
std::string Partname;
- if (AutoGeneratedParameter_p.empty()) {
+ if (autogenerated::Parameter_p.empty()) {
Partname = "16f1xxx";
} else {
- Partname = AutoGeneratedParameter_p;
+ Partname = autogenerated::Parameter_p;
}
std::string UpperCase;
- for (unsigned i = 0; i <= Partname.size(); i++) {
+ for (unsigned i = 0; i < Partname.size(); i++) {
UpperCase.push_back(std::toupper(Partname[i]));
}
return "__" + UpperCase;
}
-
// Get the dir where c16 executables reside.
std::string GetBinDir() {
- // Construct a Path object from the program name.
+ // Construct a Path object from the program name.
void *P = (void*) (intptr_t) GetBinDir;
- sys::Path ProgramFullPath
+ sys::Path ProgramFullPath
= sys::Path::GetMainExecutable(llvmc::ProgramName, P);
// Get the dir name for the program. It's last component should be 'bin'.
@@ -80,7 +83,7 @@ std::string GetInstallDir() {
// Go one more level up to get the install dir.
std::string InstallDir = BinDirPath.getDirname();
-
+
return InstallDir + GetDirSeparator();
}
diff --git a/tools/llvmc/example/mcc16/driver/Main.cpp b/tools/llvmc/examples/mcc16/Main.cpp
index e66e2f9c67c6..55ae9128394a 100644
--- a/tools/llvmc/example/mcc16/driver/Main.cpp
+++ b/tools/llvmc/examples/mcc16/Main.cpp
@@ -13,21 +13,25 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
-#include "llvm/CompilerDriver/ForceLinkage.h"
+#include "llvm/CompilerDriver/Main.h"
+
#include "llvm/System/Path.h"
+#include "llvm/Config/config.h"
+
#include <iostream>
-namespace llvmc {
- int Main(int argc, char** argv);
-}
+#include "PIC16.inc"
+
+namespace {
// Modify the PACKAGE_VERSION to use build number in top level configure file.
void PIC16VersionPrinter(void) {
std::cout << "MPLAB C16 1.0 " << PACKAGE_VERSION << "\n";
}
+}
+
int main(int argc, char** argv) {
// HACK
@@ -36,7 +40,7 @@ int main(int argc, char** argv) {
Languages.setHiddenFlag(llvm::cl::Hidden);
DryRun.setHiddenFlag(llvm::cl::Hidden);
- llvm::cl::SetVersionPrinter(PIC16VersionPrinter);
+ llvm::cl::SetVersionPrinter(PIC16VersionPrinter);
// Ask for a standard temp dir, but just cache its basename., and delete it.
llvm::sys::Path tempDir;
@@ -49,6 +53,5 @@ int main(int argc, char** argv) {
tempDir = TempDirname;
tempDir.eraseFromDisk(true);
- llvmc::ForceLinkage();
return llvmc::Main(argc, argv);
}
diff --git a/tools/llvmc/plugins/Base/Makefile b/tools/llvmc/examples/mcc16/Makefile
index ebc433505422..4409cffb5538 100644
--- a/tools/llvmc/plugins/Base/Makefile
+++ b/tools/llvmc/examples/mcc16/Makefile
@@ -1,4 +1,4 @@
-##===- tools/llvmc/plugins/Base/Makefile -------------------*- Makefile -*-===##
+##===- llvmc/examples/mcc16/Makefile -----------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -9,7 +9,7 @@
LEVEL = ../../../..
-LLVMC_PLUGIN = Base
-BUILT_SOURCES = AutoGenerated.inc
+LLVMC_BASED_DRIVER = mcc16
+BUILT_SOURCES = PIC16.inc
include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td b/tools/llvmc/examples/mcc16/PIC16.td
index 25149ad72f49..6f0419675e0c 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
+++ b/tools/llvmc/examples/mcc16/PIC16.td
@@ -1,4 +1,4 @@
-//===- PIC16Base.td - PIC16 toolchain driver ---------------*- tablegen -*-===//
+//===- PIC16.td - PIC16 toolchain driver -------------------*- tablegen -*-===//
//
// A basic driver for the PIC16 toolchain.
//
@@ -202,33 +202,33 @@ def mplink : Tool<[
// Language map
def LanguageMap : LanguageMap<[
- LangToSuffixes<"c", ["c"]>,
- LangToSuffixes<"c-cpp-output", ["i"]>,
- LangToSuffixes<"assembler", ["s"]>,
- LangToSuffixes<"assembler-with-cpp", ["S"]>,
- LangToSuffixes<"llvm-assembler", ["ll"]>,
- LangToSuffixes<"llvm-bitcode", ["bc"]>,
- LangToSuffixes<"object-code", ["o"]>,
- LangToSuffixes<"executable", ["cof"]>
+ (lang_to_suffixes "c", "c"),
+ (lang_to_suffixes "c-cpp-output", "i"),
+ (lang_to_suffixes "assembler", "s"),
+ (lang_to_suffixes "assembler-with-cpp", "S"),
+ (lang_to_suffixes "llvm-assembler", "ll"),
+ (lang_to_suffixes "llvm-bitcode", "bc"),
+ (lang_to_suffixes "object-code", "o"),
+ (lang_to_suffixes "executable", "cof")
]>;
// Compilation graph
def CompilationGraph : CompilationGraph<[
- Edge<"root", "clang_cc">,
- Edge<"root", "llvm_ld">,
- OptionalEdge<"root", "llvm_ld_optimizer", (case
- (switch_on "S"), (inc_weight),
- (switch_on "c"), (inc_weight))>,
- Edge<"root", "gpasm">,
- Edge<"root", "mplink">,
- Edge<"clang_cc", "llvm_ld">,
- OptionalEdge<"clang_cc", "llvm_ld_optimizer", (case
- (switch_on "S"), (inc_weight),
- (switch_on "c"), (inc_weight))>,
- Edge<"llvm_ld", "pic16passes">,
- Edge<"llvm_ld_optimizer", "pic16passes">,
- Edge<"pic16passes", "llc">,
- Edge<"llc", "gpasm">,
- Edge<"gpasm", "mplink">
+ (edge "root", "clang_cc"),
+ (edge "root", "llvm_ld"),
+ (optional_edge "root", "llvm_ld_optimizer",
+ (case (switch_on "S"), (inc_weight),
+ (switch_on "c"), (inc_weight))),
+ (edge "root", "gpasm"),
+ (edge "root", "mplink"),
+ (edge "clang_cc", "llvm_ld"),
+ (optional_edge "clang_cc", "llvm_ld_optimizer",
+ (case (switch_on "S"), (inc_weight),
+ (switch_on "c"), (inc_weight))),
+ (edge "llvm_ld", "pic16passes"),
+ (edge "llvm_ld_optimizer", "pic16passes"),
+ (edge "pic16passes", "llc"),
+ (edge "llc", "gpasm"),
+ (edge "gpasm", "mplink")
]>;
diff --git a/tools/llvmc/example/mcc16/README b/tools/llvmc/examples/mcc16/README
index eeef6a4f8f1b..6d2b73d5d979 100644
--- a/tools/llvmc/example/mcc16/README
+++ b/tools/llvmc/examples/mcc16/README
@@ -1,5 +1,5 @@
This is a basic compiler driver for the PIC16 toolchain that shows how to create
-your own llvmc-based drivers. It is based on the example/Skeleton template.
+your own llvmc-based drivers. It is based on the examples/Skeleton template.
The PIC16 toolchain looks like this:
diff --git a/tools/llvmc/plugins/Base/PluginMain.cpp b/tools/llvmc/plugins/Base/PluginMain.cpp
deleted file mode 100644
index add8acb4a574..000000000000
--- a/tools/llvmc/plugins/Base/PluginMain.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "AutoGenerated.inc"
diff --git a/tools/llvmc/plugins/Clang/PluginMain.cpp b/tools/llvmc/plugins/Clang/PluginMain.cpp
deleted file mode 100644
index add8acb4a574..000000000000
--- a/tools/llvmc/plugins/Clang/PluginMain.cpp
+++ /dev/null
@@ -1 +0,0 @@
-#include "AutoGenerated.inc"
diff --git a/tools/llvmc/plugins/Makefile b/tools/llvmc/plugins/Makefile
deleted file mode 100644
index 37dac6f0ac05..000000000000
--- a/tools/llvmc/plugins/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-##===- tools/llvmc/plugins/Makefile ------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open
-# Source License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-DIRS = $(LLVMC_BUILTIN_PLUGINS)
-endif
-
-export LLVMC_BUILTIN_PLUGIN=1
-
-include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/src/AutoGenerated.td b/tools/llvmc/src/AutoGenerated.td
new file mode 100644
index 000000000000..8507b1ff225a
--- /dev/null
+++ b/tools/llvmc/src/AutoGenerated.td
@@ -0,0 +1,17 @@
+//===- AutoGenerated.td - LLVMC toolchain descriptions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains compilation graph description used by llvmc.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CompilerDriver/Common.td"
+
+include "Base.td"
+include "Clang.td"
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/src/Base.td.in
index a042997e1086..0c4de4c3fa50 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/src/Base.td.in
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-include "llvm/CompilerDriver/Common.td"
// Options
@@ -263,7 +262,7 @@ def llc : Tool<
// Base class for linkers
class llvm_gcc_based_linker <string cmd_prefix, dag on_empty> : Tool<
-[(in_language ["object-code", "static-library"]),
+[(in_language ["object-code", "static-library", "dynamic-library"]),
(out_language "executable"),
(output_suffix "out"),
(command cmd_prefix),
@@ -305,73 +304,79 @@ def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@",
// Language map
-def LanguageMap : LanguageMap<
- [LangToSuffixes<"c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]>,
- LangToSuffixes<"c++-header", ["hpp"]>,
- LangToSuffixes<"c", ["c"]>,
- LangToSuffixes<"c-header", ["h"]>,
- LangToSuffixes<"c-cpp-output", ["i"]>,
- LangToSuffixes<"objective-c-cpp-output", ["mi"]>,
- LangToSuffixes<"objective-c++", ["mm"]>,
- LangToSuffixes<"objective-c++-header", ["hmm"]>,
- LangToSuffixes<"objective-c", ["m"]>,
- LangToSuffixes<"objective-c-header", ["hm"]>,
- LangToSuffixes<"assembler", ["s"]>,
- LangToSuffixes<"assembler-with-cpp", ["S"]>,
- LangToSuffixes<"llvm-assembler", ["ll"]>,
- LangToSuffixes<"llvm-bitcode", ["bc"]>,
- LangToSuffixes<"object-code", ["o", "*empty*"]>,
- LangToSuffixes<"static-library", ["a", "lib"]>,
- LangToSuffixes<"executable", ["out"]>
- ]>;
+def LanguageMap : LanguageMap<[
+ (lang_to_suffixes "c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]),
+ (lang_to_suffixes "c++-header", "hpp"),
+ (lang_to_suffixes "c", "c"),
+ (lang_to_suffixes "c-header", "h"),
+ (lang_to_suffixes "c-cpp-output", "i"),
+ (lang_to_suffixes "objective-c-cpp-output", "mi"),
+ (lang_to_suffixes "objective-c++", "mm"),
+ (lang_to_suffixes "objective-c++-header", "hmm"),
+ (lang_to_suffixes "objective-c", "m"),
+ (lang_to_suffixes "objective-c-header", "hm"),
+ (lang_to_suffixes "assembler", "s"),
+ (lang_to_suffixes "assembler-with-cpp", "S"),
+ (lang_to_suffixes "llvm-assembler", "ll"),
+ (lang_to_suffixes "llvm-bitcode", "bc"),
+ (lang_to_suffixes "object-code", ["o", "*empty*"]),
+ (lang_to_suffixes "static-library", ["a", "lib"]),
+ (lang_to_suffixes "dynamic-library", ["so", "dylib", "dll"]),
+ (lang_to_suffixes "executable", ["out"])
+]>;
// Compilation graph
def CompilationGraph : CompilationGraph<[
- Edge<"root", "llvm_gcc_c">,
- Edge<"root", "llvm_gcc_assembler">,
- Edge<"root", "llvm_gcc_cpp">,
- Edge<"root", "llvm_gcc_m">,
- Edge<"root", "llvm_gcc_mxx">,
- Edge<"root", "llc">,
+ (edge "root", "llvm_gcc_c"),
+ (edge "root", "llvm_gcc_assembler"),
+ (edge "root", "llvm_gcc_cpp"),
+ (edge "root", "llvm_gcc_m"),
+ (edge "root", "llvm_gcc_mxx"),
+ (edge "root", "llc"),
- Edge<"root", "llvm_gcc_c_pch">,
- Edge<"root", "llvm_gcc_cpp_pch">,
- Edge<"root", "llvm_gcc_m_pch">,
- Edge<"root", "llvm_gcc_mxx_pch">,
+ (edge "root", "llvm_gcc_c_pch"),
+ (edge "root", "llvm_gcc_cpp_pch"),
+ (edge "root", "llvm_gcc_m_pch"),
+ (edge "root", "llvm_gcc_mxx_pch"),
- Edge<"llvm_gcc_c", "llc">,
- Edge<"llvm_gcc_cpp", "llc">,
- Edge<"llvm_gcc_m", "llc">,
- Edge<"llvm_gcc_mxx", "llc">,
- Edge<"llvm_as", "llc">,
+ (edge "llvm_gcc_c", "llc"),
+ (edge "llvm_gcc_cpp", "llc"),
+ (edge "llvm_gcc_m", "llc"),
+ (edge "llvm_gcc_mxx", "llc"),
+ (edge "llvm_as", "llc"),
- OptionalEdge<"root", "llvm_as",
- (case (switch_on "emit-llvm"), (inc_weight))>,
- OptionalEdge<"llvm_gcc_c", "opt", (case (switch_on "opt"), (inc_weight))>,
- OptionalEdge<"llvm_gcc_cpp", "opt", (case (switch_on "opt"), (inc_weight))>,
- OptionalEdge<"llvm_gcc_m", "opt", (case (switch_on "opt"), (inc_weight))>,
- OptionalEdge<"llvm_gcc_mxx", "opt", (case (switch_on "opt"), (inc_weight))>,
- OptionalEdge<"llvm_as", "opt", (case (switch_on "opt"), (inc_weight))>,
- Edge<"opt", "llc">,
+ (optional_edge "root", "llvm_as",
+ (case (switch_on "emit-llvm"), (inc_weight))),
+ (optional_edge "llvm_gcc_c", "opt",
+ (case (switch_on "opt"), (inc_weight))),
+ (optional_edge "llvm_gcc_cpp", "opt",
+ (case (switch_on "opt"), (inc_weight))),
+ (optional_edge "llvm_gcc_m", "opt",
+ (case (switch_on "opt"), (inc_weight))),
+ (optional_edge "llvm_gcc_mxx", "opt",
+ (case (switch_on "opt"), (inc_weight))),
+ (optional_edge "llvm_as", "opt",
+ (case (switch_on "opt"), (inc_weight))),
+ (edge "opt", "llc"),
- Edge<"llc", "llvm_gcc_assembler">,
- Edge<"llvm_gcc_assembler", "llvm_gcc_linker">,
- OptionalEdge<"llvm_gcc_assembler", "llvm_gcc_cpp_linker",
+ (edge "llc", "llvm_gcc_assembler"),
+ (edge "llvm_gcc_assembler", "llvm_gcc_linker"),
+ (optional_edge "llvm_gcc_assembler", "llvm_gcc_cpp_linker",
(case
(or (input_languages_contain "c++"),
(input_languages_contain "objective-c++")),
(inc_weight),
(or (parameter_equals "linker", "g++"),
- (parameter_equals "linker", "c++")), (inc_weight))>,
+ (parameter_equals "linker", "c++")), (inc_weight))),
- Edge<"root", "llvm_gcc_linker">,
- OptionalEdge<"root", "llvm_gcc_cpp_linker",
+ (edge "root", "llvm_gcc_linker"),
+ (optional_edge "root", "llvm_gcc_cpp_linker",
(case
(or (input_languages_contain "c++"),
(input_languages_contain "objective-c++")),
(inc_weight),
(or (parameter_equals "linker", "g++"),
- (parameter_equals "linker", "c++")), (inc_weight))>
- ]>;
+ (parameter_equals "linker", "c++")), (inc_weight)))
+]>;
diff --git a/tools/llvmc/plugins/Clang/Clang.td b/tools/llvmc/src/Clang.td
index 988d9b1c8ab1..1d75743f4fee 100644
--- a/tools/llvmc/plugins/Clang/Clang.td
+++ b/tools/llvmc/src/Clang.td
@@ -1,22 +1,18 @@
-include "llvm/CompilerDriver/Common.td"
+//===- Clang.td - LLVMC toolchain descriptions -------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains compilation graph description used by llvmc.
+//
+//===----------------------------------------------------------------------===//
-def Priority : PluginPriority<1>;
def Options : OptionList<[
-// Extern options
-(switch_option "E", (extern)),
-(switch_option "S", (extern)),
-(switch_option "c", (extern)),
-(switch_option "fsyntax-only", (extern)),
-(switch_option "emit-llvm", (extern)),
-(switch_option "pthread", (extern)),
-(parameter_list_option "I", (extern)),
-(parameter_list_option "include", (extern)),
-(parameter_list_option "L", (extern)),
-(parameter_list_option "l", (extern)),
-(prefix_list_option "Wa,", (extern)),
-(prefix_list_option "Wl,", (extern)),
-
(switch_option "clang", (help "Use Clang instead of llvm-gcc"))
]>;
@@ -71,31 +67,21 @@ def llvm_ld : Tool<
(join)
]>;
-// Language map
-
-def LanguageMap : LanguageMap<[
- LangToSuffixes<"c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]>,
- LangToSuffixes<"c", ["c"]>,
- LangToSuffixes<"objective-c", ["m"]>,
- LangToSuffixes<"c-cpp-output", ["i"]>,
- LangToSuffixes<"objective-c-cpp-output", ["mi"]>
-]>;
-
// Compilation graph
-def CompilationGraph : CompilationGraph<[
- OptionalEdge<"root", "clang_c",
- (case (switch_on "clang"), (inc_weight))>,
- OptionalEdge<"root", "clang_cpp",
- (case (switch_on "clang"), (inc_weight))>,
- OptionalEdge<"root", "clang_objective_c",
- (case (switch_on "clang"), (inc_weight))>,
- OptionalEdge<"root", "clang_objective_cpp",
- (case (switch_on "clang"), (inc_weight))>,
- Edge<"clang_c", "llc">,
- Edge<"clang_cpp", "llc">,
- Edge<"clang_objective_c", "llc">,
- Edge<"clang_objective_cpp", "llc">,
- OptionalEdge<"llc", "as", (case (switch_on "clang"), (inc_weight))>,
- Edge<"as", "llvm_ld">
+def ClangCompilationGraph : CompilationGraph<[
+ (optional_edge "root", "clang_c",
+ (case (switch_on "clang"), (inc_weight))),
+ (optional_edge "root", "clang_cpp",
+ (case (switch_on "clang"), (inc_weight))),
+ (optional_edge "root", "clang_objective_c",
+ (case (switch_on "clang"), (inc_weight))),
+ (optional_edge "root", "clang_objective_cpp",
+ (case (switch_on "clang"), (inc_weight))),
+ (edge "clang_c", "llc"),
+ (edge "clang_cpp", "llc"),
+ (edge "clang_objective_c", "llc"),
+ (edge "clang_objective_cpp", "llc"),
+ (optional_edge "llc", "as", (case (switch_on "clang"), (inc_weight))),
+ (edge "as", "llvm_ld")
]>;
diff --git a/tools/llvmc/plugins/Base/Hooks.cpp b/tools/llvmc/src/Hooks.cpp
index 661a914489db..661a914489db 100644
--- a/tools/llvmc/plugins/Base/Hooks.cpp
+++ b/tools/llvmc/src/Hooks.cpp
diff --git a/tools/llvmc/example/Skeleton/driver/Main.cpp b/tools/llvmc/src/Main.cpp
index b1f5b6798ae6..9f9c71aa8c34 100644
--- a/tools/llvmc/example/Skeleton/driver/Main.cpp
+++ b/tools/llvmc/src/Main.cpp
@@ -7,8 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// Just include CompilerDriver/Main.inc.
+// Just include AutoGenerated.inc and CompilerDriver/Main.inc.
//
//===----------------------------------------------------------------------===//
+#include "AutoGenerated.inc"
+
#include "llvm/CompilerDriver/Main.inc"
diff --git a/tools/llvmc/driver/Makefile b/tools/llvmc/src/Makefile
index 2f3104b40896..f3f30911a400 100644
--- a/tools/llvmc/driver/Makefile
+++ b/tools/llvmc/src/Makefile
@@ -1,4 +1,4 @@
-##===- tools/llvmc/driver/Makefile -------------------------*- Makefile -*-===##
+##===- tools/llvmc/src/Makefile ----------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -8,6 +8,7 @@
##===----------------------------------------------------------------------===##
LEVEL = ../../..
-LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
+LLVMC_BASED_DRIVER = llvmc
+BUILT_SOURCES = AutoGenerated.inc
include $(LEVEL)/Makefile.common
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 911fddfe1888..671348c8333b 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -119,6 +119,11 @@ bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model,
return true;
}
+void LTOCodeGenerator::setCpu(const char* mCpu)
+{
+ _mCpu = mCpu;
+}
+
void LTOCodeGenerator::setAssemblerPath(const char* path)
{
if ( _assemblerPath )
@@ -126,6 +131,14 @@ void LTOCodeGenerator::setAssemblerPath(const char* path)
_assemblerPath = new sys::Path(path);
}
+void LTOCodeGenerator::setAssemblerArgs(const char** args, int nargs)
+{
+ for (int i = 0; i < nargs; ++i) {
+ const char *arg = args[i];
+ _assemblerArgs.push_back(arg);
+ }
+}
+
void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
{
_mustPreserveSymbols[sym] = 1;
@@ -142,8 +155,8 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
// create output file
std::string ErrInfo;
- raw_fd_ostream Out(path, ErrInfo,
- raw_fd_ostream::F_Binary);
+ tool_output_file Out(path, ErrInfo,
+ raw_fd_ostream::F_Binary);
if (!ErrInfo.empty()) {
errMsg = "could not open bitcode file for writing: ";
errMsg += path;
@@ -151,16 +164,17 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
}
// write bitcode to it
- WriteBitcodeToFile(_linker.getModule(), Out);
- Out.close();
+ WriteBitcodeToFile(_linker.getModule(), Out.os());
+ Out.os().close();
- if (Out.has_error()) {
+ if (Out.os().has_error()) {
errMsg = "could not write bitcode file: ";
errMsg += path;
- Out.clear_error();
+ Out.os().clear_error();
return true;
}
+ Out.keep();
return false;
}
@@ -176,11 +190,16 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
// generate assembly code
bool genResult = false;
{
- raw_fd_ostream asmFD(uniqueAsmPath.c_str(), errMsg);
- formatted_raw_ostream asmFile(asmFD);
+ tool_output_file asmFile(uniqueAsmPath.c_str(), errMsg);
if (!errMsg.empty())
return NULL;
- genResult = this->generateAssemblyCode(asmFile, errMsg);
+ genResult = this->generateAssemblyCode(asmFile.os(), errMsg);
+ asmFile.os().close();
+ if (asmFile.os().has_error()) {
+ asmFile.os().clear_error();
+ return NULL;
+ }
+ asmFile.keep();
}
if ( genResult ) {
uniqueAsmPath.eraseFromDisk();
@@ -257,6 +276,11 @@ bool LTOCodeGenerator::assemble(const std::string& asmPath,
args.push_back("-c");
args.push_back("-x");
args.push_back("assembler");
+ } else {
+ for (std::vector<std::string>::iterator I = _assemblerArgs.begin(),
+ E = _assemblerArgs.end(); I != E; ++I) {
+ args.push_back(I->c_str());
+ }
}
args.push_back("-o");
args.push_back(objPath.c_str());
@@ -301,7 +325,7 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
// construct LTModule, hand over ownership of module and target
SubtargetFeatures Features;
- Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+ Features.getDefaultSubtargetFeatures(_mCpu, llvm::Triple(Triple));
std::string FeatureStr = Features.getString();
_target = march->createTargetMachine(Triple, FeatureStr);
}
@@ -343,7 +367,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
}
/// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
+bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out,
std::string& errMsg)
{
if ( this->determineTarget(errMsg) )
@@ -378,7 +402,9 @@ bool LTOCodeGenerator::generateAssemblyCode(formatted_raw_ostream& out,
codeGenPasses->add(new TargetData(*_target->getTargetData()));
- if (_target->addPassesToEmitFile(*codeGenPasses, out,
+ formatted_raw_ostream Out(out);
+
+ if (_target->addPassesToEmitFile(*codeGenPasses, Out,
TargetMachine::CGFT_AssemblyFile,
CodeGenOpt::Aggressive)) {
errMsg = "target file type not supported";
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index cac3b8c8bf67..f5b78a608a99 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -36,14 +36,16 @@ struct LTOCodeGenerator {
bool addModule(struct LTOModule*, std::string& errMsg);
bool setDebugInfo(lto_debug_model, std::string& errMsg);
bool setCodePICModel(lto_codegen_model, std::string& errMsg);
+ void setCpu(const char *cpu);
void setAssemblerPath(const char* path);
+ void setAssemblerArgs(const char** args, int nargs);
void addMustPreserveSymbol(const char* sym);
bool writeMergedModules(const char* path,
std::string& errMsg);
const void* compile(size_t* length, std::string& errMsg);
void setCodeGenDebugOptions(const char *opts);
private:
- bool generateAssemblyCode(llvm::formatted_raw_ostream& out,
+ bool generateAssemblyCode(llvm::raw_ostream& out,
std::string& errMsg);
bool assemble(const std::string& asmPath,
const std::string& objPath, std::string& errMsg);
@@ -62,6 +64,8 @@ private:
llvm::MemoryBuffer* _nativeObjectFile;
std::vector<const char*> _codegenOptions;
llvm::sys::Path* _assemblerPath;
+ std::string _mCpu;
+ std::vector<std::string> _assemblerArgs;
};
#endif // LTO_CODE_GENERATOR_H
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 0870205a778b..c7cd585d03dd 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -4,10 +4,10 @@
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This file implements the Link Time Optimization library. This library is
+// This file implements the Link Time Optimization library. This library is
// intended to be used by linker to optimize code at link time.
//
//===----------------------------------------------------------------------===//
@@ -36,490 +36,473 @@
using namespace llvm;
-bool LTOModule::isBitcodeFile(const void* mem, size_t length)
-{
- return llvm::sys::IdentifyFileType((char*)mem, length)
- == llvm::sys::Bitcode_FileType;
+bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
+ return llvm::sys::IdentifyFileType((char*)mem, length)
+ == llvm::sys::Bitcode_FileType;
}
-bool LTOModule::isBitcodeFile(const char* path)
-{
- return llvm::sys::Path(path).isBitcodeFile();
+bool LTOModule::isBitcodeFile(const char *path) {
+ return llvm::sys::Path(path).isBitcodeFile();
}
-bool LTOModule::isBitcodeFileForTarget(const void* mem, size_t length,
- const char* triplePrefix)
-{
- MemoryBuffer* buffer = makeBuffer(mem, length);
- if (!buffer)
- return false;
- return isTargetMatch(buffer, triplePrefix);
+bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
+ const char *triplePrefix) {
+ MemoryBuffer *buffer = makeBuffer(mem, length);
+ if (!buffer)
+ return false;
+ return isTargetMatch(buffer, triplePrefix);
}
-bool LTOModule::isBitcodeFileForTarget(const char* path,
- const char* triplePrefix)
-{
- MemoryBuffer *buffer = MemoryBuffer::getFile(path);
- if (buffer == NULL)
- return false;
- return isTargetMatch(buffer, triplePrefix);
+bool LTOModule::isBitcodeFileForTarget(const char *path,
+ const char *triplePrefix) {
+ MemoryBuffer *buffer = MemoryBuffer::getFile(path);
+ if (buffer == NULL)
+ return false;
+ return isTargetMatch(buffer, triplePrefix);
}
-// takes ownership of buffer
-bool LTOModule::isTargetMatch(MemoryBuffer* buffer, const char* triplePrefix)
-{
- OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext()));
- // on success, m owns buffer and both are deleted at end of this method
- if (!m) {
- delete buffer;
- return false;
- }
- std::string actualTarget = m->getTargetTriple();
- return (strncmp(actualTarget.c_str(), triplePrefix,
- strlen(triplePrefix)) == 0);
+// Takes ownership of buffer.
+bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
+ OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext()));
+ // On success, m owns buffer and both are deleted at end of this method.
+ if (!m) {
+ delete buffer;
+ return false;
+ }
+ std::string actualTarget = m->getTargetTriple();
+ return (strncmp(actualTarget.c_str(), triplePrefix,
+ strlen(triplePrefix)) == 0);
}
-LTOModule::LTOModule(Module* m, TargetMachine* t)
- : _module(m), _target(t), _symbolsParsed(false)
+LTOModule::LTOModule(Module *m, TargetMachine *t)
+ : _module(m), _target(t), _symbolsParsed(false)
{
}
-LTOModule* LTOModule::makeLTOModule(const char* path,
- std::string& errMsg)
-{
- OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg));
- if (!buffer)
- return NULL;
- return makeLTOModule(buffer.get(), errMsg);
+LTOModule *LTOModule::makeLTOModule(const char *path,
+ std::string &errMsg) {
+ OwningPtr<MemoryBuffer> buffer(MemoryBuffer::getFile(path, &errMsg));
+ if (!buffer)
+ return NULL;
+ return makeLTOModule(buffer.get(), errMsg);
}
-/// makeBuffer - create a MemoryBuffer from a memory range.
-/// MemoryBuffer requires the byte past end of the buffer to be a zero.
-/// We might get lucky and already be that way, otherwise make a copy.
-/// Also if next byte is on a different page, don't assume it is readable.
-MemoryBuffer* LTOModule::makeBuffer(const void* mem, size_t length)
-{
- const char *startPtr = (char*)mem;
- const char *endPtr = startPtr+length;
- if (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0 ||
- *endPtr != 0)
- return MemoryBuffer::getMemBufferCopy(StringRef(startPtr, length));
-
- return MemoryBuffer::getMemBuffer(StringRef(startPtr, length));
+/// makeBuffer - Create a MemoryBuffer from a memory range. MemoryBuffer
+/// requires the byte past end of the buffer to be a zero. We might get lucky
+/// and already be that way, otherwise make a copy. Also if next byte is on a
+/// different page, don't assume it is readable.
+MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
+ const char *startPtr = (char*)mem;
+ const char *endPtr = startPtr+length;
+ if (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0 ||
+ *endPtr != 0)
+ return MemoryBuffer::getMemBufferCopy(StringRef(startPtr, length));
+
+ return MemoryBuffer::getMemBuffer(StringRef(startPtr, length));
}
-LTOModule* LTOModule::makeLTOModule(const void* mem, size_t length,
- std::string& errMsg)
-{
- OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
- if (!buffer)
- return NULL;
- return makeLTOModule(buffer.get(), errMsg);
+LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
+ std::string &errMsg) {
+ OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
+ if (!buffer)
+ return NULL;
+ return makeLTOModule(buffer.get(), errMsg);
}
-LTOModule* LTOModule::makeLTOModule(MemoryBuffer* buffer,
- std::string& errMsg)
-{
- InitializeAllTargets();
-
- // parse bitcode buffer
- OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg));
- if (!m)
- return NULL;
-
- std::string Triple = m->getTargetTriple();
- if (Triple.empty())
- Triple = sys::getHostTriple();
-
- // find machine architecture for this module
- const Target* march = TargetRegistry::lookupTarget(Triple, errMsg);
- if (!march)
- return NULL;
-
- // construct LTModule, hand over ownership of module and target
- SubtargetFeatures Features;
- Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
- std::string FeatureStr = Features.getString();
- TargetMachine* target = march->createTargetMachine(Triple, FeatureStr);
- return new LTOModule(m.take(), target);
+LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
+ std::string &errMsg) {
+ InitializeAllTargets();
+
+ // parse bitcode buffer
+ OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg));
+ if (!m)
+ return NULL;
+
+ std::string Triple = m->getTargetTriple();
+ if (Triple.empty())
+ Triple = sys::getHostTriple();
+
+ // find machine architecture for this module
+ const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+ if (!march)
+ return NULL;
+
+ // construct LTModule, hand over ownership of module and target
+ SubtargetFeatures Features;
+ Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+ std::string FeatureStr = Features.getString();
+ TargetMachine *target = march->createTargetMachine(Triple, FeatureStr);
+ return new LTOModule(m.take(), target);
}
-const char* LTOModule::getTargetTriple()
-{
- return _module->getTargetTriple().c_str();
+const char *LTOModule::getTargetTriple() {
+ return _module->getTargetTriple().c_str();
}
-void LTOModule::addDefinedFunctionSymbol(Function* f, Mangler &mangler)
-{
- // add to list of defined symbols
- addDefinedSymbol(f, mangler, true);
-
- // add external symbols referenced by this function.
- for (Function::iterator b = f->begin(); b != f->end(); ++b) {
- for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
- for (unsigned count = 0, total = i->getNumOperands();
- count != total; ++count) {
- findExternalRefs(i->getOperand(count), mangler);
- }
- }
- }
+void LTOModule::setTargetTriple(const char *triple) {
+ _module->setTargetTriple(triple);
}
-// get string that data pointer points to
-bool LTOModule::objcClassNameFromExpression(Constant* c, std::string& name)
-{
- if (ConstantExpr* ce = dyn_cast<ConstantExpr>(c)) {
- Constant* op = ce->getOperand(0);
- if (GlobalVariable* gvn = dyn_cast<GlobalVariable>(op)) {
- Constant* cn = gvn->getInitializer();
- if (ConstantArray* ca = dyn_cast<ConstantArray>(cn)) {
- if (ca->isCString()) {
- name = ".objc_class_name_" + ca->getAsString();
- return true;
- }
- }
- }
+void LTOModule::addDefinedFunctionSymbol(Function *f, Mangler &mangler) {
+ // add to list of defined symbols
+ addDefinedSymbol(f, mangler, true);
+
+ // add external symbols referenced by this function.
+ for (Function::iterator b = f->begin(); b != f->end(); ++b) {
+ for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+ for (unsigned count = 0, total = i->getNumOperands();
+ count != total; ++count) {
+ findExternalRefs(i->getOperand(count), mangler);
+ }
}
- return false;
+ }
}
-// parse i386/ppc ObjC class data structure
-void LTOModule::addObjCClass(GlobalVariable* clgv)
-{
- if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
- // second slot in __OBJC,__class is pointer to superclass name
- std::string superclassName;
- if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
- NameAndAttributes info;
- if (_undefines.find(superclassName.c_str()) == _undefines.end()) {
- const char* symbolName = ::strdup(superclassName.c_str());
- info.name = ::strdup(symbolName);
- info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
- // string is owned by _undefines
- _undefines[info.name] = info;
- }
+// Get string that data pointer points to.
+bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
+ if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
+ Constant *op = ce->getOperand(0);
+ if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
+ Constant *cn = gvn->getInitializer();
+ if (ConstantArray *ca = dyn_cast<ConstantArray>(cn)) {
+ if (ca->isCString()) {
+ name = ".objc_class_name_" + ca->getAsString();
+ return true;
}
- // third slot in __OBJC,__class is pointer to class name
- std::string className;
- if (objcClassNameFromExpression(c->getOperand(2), className)) {
- const char* symbolName = ::strdup(className.c_str());
- NameAndAttributes info;
- info.name = symbolName;
- info.attributes = (lto_symbol_attributes)
- (LTO_SYMBOL_PERMISSIONS_DATA |
- LTO_SYMBOL_DEFINITION_REGULAR |
- LTO_SYMBOL_SCOPE_DEFAULT);
- _symbols.push_back(info);
- _defines[info.name] = 1;
- }
+ }
}
+ }
+ return false;
}
-
-// parse i386/ppc ObjC category data structure
-void LTOModule::addObjCCategory(GlobalVariable* clgv)
-{
- if (ConstantStruct* c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
- // second slot in __OBJC,__category is pointer to target class name
- std::string targetclassName;
- if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) {
- NameAndAttributes info;
- if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
- const char* symbolName = ::strdup(targetclassName.c_str());
- info.name = ::strdup(symbolName);
- info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
- // string is owned by _undefines
- _undefines[info.name] = info;
- }
- }
+// Parse i386/ppc ObjC class data structure.
+void LTOModule::addObjCClass(GlobalVariable *clgv) {
+ if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
+ // second slot in __OBJC,__class is pointer to superclass name
+ std::string superclassName;
+ if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
+ NameAndAttributes info;
+ if (_undefines.find(superclassName.c_str()) == _undefines.end()) {
+ const char *symbolName = ::strdup(superclassName.c_str());
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ // string is owned by _undefines
+ _undefines[info.name] = info;
+ }
+ }
+ // third slot in __OBJC,__class is pointer to class name
+ std::string className;
+ if (objcClassNameFromExpression(c->getOperand(2), className)) {
+ const char *symbolName = ::strdup(className.c_str());
+ NameAndAttributes info;
+ info.name = symbolName;
+ info.attributes = (lto_symbol_attributes)
+ (LTO_SYMBOL_PERMISSIONS_DATA |
+ LTO_SYMBOL_DEFINITION_REGULAR |
+ LTO_SYMBOL_SCOPE_DEFAULT);
+ _symbols.push_back(info);
+ _defines[info.name] = 1;
}
+ }
}
-// parse i386/ppc ObjC class list data structure
-void LTOModule::addObjCClassRef(GlobalVariable* clgv)
-{
+// Parse i386/ppc ObjC category data structure.
+void LTOModule::addObjCCategory(GlobalVariable *clgv) {
+ if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
+ // second slot in __OBJC,__category is pointer to target class name
std::string targetclassName;
- if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) {
- NameAndAttributes info;
- if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
- const char* symbolName = ::strdup(targetclassName.c_str());
- info.name = ::strdup(symbolName);
- info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
- // string is owned by _undefines
- _undefines[info.name] = info;
- }
+ if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) {
+ NameAndAttributes info;
+ if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
+ const char *symbolName = ::strdup(targetclassName.c_str());
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ // string is owned by _undefines
+ _undefines[info.name] = info;
+ }
}
+ }
}
-void LTOModule::addDefinedDataSymbol(GlobalValue* v, Mangler& mangler)
-{
- // add to list of defined symbols
- addDefinedSymbol(v, mangler, false);
-
- // Special case i386/ppc ObjC data structures in magic sections:
- // The issue is that the old ObjC object format did some strange
- // contortions to avoid real linker symbols. For instance, the
- // ObjC class data structure is allocated statically in the executable
- // that defines that class. That data structures contains a pointer to
- // its superclass. But instead of just initializing that part of the
- // struct to the address of its superclass, and letting the static and
- // dynamic linkers do the rest, the runtime works by having that field
- // instead point to a C-string that is the name of the superclass.
- // At runtime the objc initialization updates that pointer and sets
- // it to point to the actual super class. As far as the linker
- // knows it is just a pointer to a string. But then someone wanted the
- // linker to issue errors at build time if the superclass was not found.
- // So they figured out a way in mach-o object format to use an absolute
- // symbols (.objc_class_name_Foo = 0) and a floating reference
- // (.reference .objc_class_name_Bar) to cause the linker into erroring when
- // a class was missing.
- // The following synthesizes the implicit .objc_* symbols for the linker
- // from the ObjC data structures generated by the front end.
- if (v->hasSection() /* && isTargetDarwin */) {
- // special case if this data blob is an ObjC class definition
- if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
- if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClass(gv);
- }
- }
-
- // special case if this data blob is an ObjC category definition
- else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
- if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
- addObjCCategory(gv);
- }
- }
-
- // special case if this data blob is the list of referenced classes
- else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
- if (GlobalVariable* gv = dyn_cast<GlobalVariable>(v)) {
- addObjCClassRef(gv);
- }
- }
- }
-
- // add external symbols referenced by this data.
- for (unsigned count = 0, total = v->getNumOperands();
- count != total; ++count) {
- findExternalRefs(v->getOperand(count), mangler);
+// Parse i386/ppc ObjC class list data structure.
+void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
+ std::string targetclassName;
+ if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) {
+ NameAndAttributes info;
+ if (_undefines.find(targetclassName.c_str()) == _undefines.end()) {
+ const char *symbolName = ::strdup(targetclassName.c_str());
+ info.name = symbolName;
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ // string is owned by _undefines
+ _undefines[info.name] = info;
}
+ }
}
-void LTOModule::addDefinedSymbol(GlobalValue* def, Mangler &mangler,
- bool isFunction)
-{
- // ignore all llvm.* symbols
- if (def->getName().startswith("llvm."))
- return;
-
- // string is owned by _defines
- const char* symbolName = ::strdup(mangler.getNameWithPrefix(def).c_str());
-
- // set alignment part log2() can have rounding errors
- uint32_t align = def->getAlignment();
- uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
-
- // set permissions part
- if (isFunction)
- attr |= LTO_SYMBOL_PERMISSIONS_CODE;
- else {
- GlobalVariable* gv = dyn_cast<GlobalVariable>(def);
- if (gv && gv->isConstant())
- attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
- else
- attr |= LTO_SYMBOL_PERMISSIONS_DATA;
- }
-
- // set definition part
- if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) {
- attr |= LTO_SYMBOL_DEFINITION_WEAK;
- }
- else if (def->hasCommonLinkage()) {
- attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
+void LTOModule::addDefinedDataSymbol(GlobalValue *v, Mangler &mangler) {
+ // Add to list of defined symbols.
+ addDefinedSymbol(v, mangler, false);
+
+ // Special case i386/ppc ObjC data structures in magic sections:
+ // The issue is that the old ObjC object format did some strange
+ // contortions to avoid real linker symbols. For instance, the
+ // ObjC class data structure is allocated statically in the executable
+ // that defines that class. That data structures contains a pointer to
+ // its superclass. But instead of just initializing that part of the
+ // struct to the address of its superclass, and letting the static and
+ // dynamic linkers do the rest, the runtime works by having that field
+ // instead point to a C-string that is the name of the superclass.
+ // At runtime the objc initialization updates that pointer and sets
+ // it to point to the actual super class. As far as the linker
+ // knows it is just a pointer to a string. But then someone wanted the
+ // linker to issue errors at build time if the superclass was not found.
+ // So they figured out a way in mach-o object format to use an absolute
+ // symbols (.objc_class_name_Foo = 0) and a floating reference
+ // (.reference .objc_class_name_Bar) to cause the linker into erroring when
+ // a class was missing.
+ // The following synthesizes the implicit .objc_* symbols for the linker
+ // from the ObjC data structures generated by the front end.
+ if (v->hasSection() /* && isTargetDarwin */) {
+ // special case if this data blob is an ObjC class definition
+ if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
+ if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCClass(gv);
+ }
}
- else {
- attr |= LTO_SYMBOL_DEFINITION_REGULAR;
+
+ // special case if this data blob is an ObjC category definition
+ else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
+ if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCCategory(gv);
+ }
}
-
- // set scope part
- if (def->hasHiddenVisibility())
- attr |= LTO_SYMBOL_SCOPE_HIDDEN;
- else if (def->hasProtectedVisibility())
- attr |= LTO_SYMBOL_SCOPE_PROTECTED;
- else if (def->hasExternalLinkage() || def->hasWeakLinkage()
- || def->hasLinkOnceLinkage() || def->hasCommonLinkage())
- attr |= LTO_SYMBOL_SCOPE_DEFAULT;
- else
- attr |= LTO_SYMBOL_SCOPE_INTERNAL;
- // add to table of symbols
- NameAndAttributes info;
- info.name = symbolName;
- info.attributes = (lto_symbol_attributes)attr;
- _symbols.push_back(info);
- _defines[info.name] = 1;
-}
+ // special case if this data blob is the list of referenced classes
+ else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
+ if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+ addObjCClassRef(gv);
+ }
+ }
+ }
-void LTOModule::addAsmGlobalSymbol(const char *name) {
- // only add new define if not already defined
- if (_defines.count(name) == 0)
- return;
-
- // string is owned by _defines
- const char *symbolName = ::strdup(name);
- uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
- attr |= LTO_SYMBOL_SCOPE_DEFAULT;
- NameAndAttributes info;
- info.name = symbolName;
- info.attributes = (lto_symbol_attributes)attr;
- _symbols.push_back(info);
- _defines[info.name] = 1;
+ // add external symbols referenced by this data.
+ for (unsigned count = 0, total = v->getNumOperands();
+ count != total; ++count) {
+ findExternalRefs(v->getOperand(count), mangler);
+ }
}
-void LTOModule::addPotentialUndefinedSymbol(GlobalValue* decl, Mangler &mangler)
-{
- // ignore all llvm.* symbols
- if (decl->getName().startswith("llvm."))
- return;
- // ignore all aliases
- if (isa<GlobalAlias>(decl))
- return;
+void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
+ bool isFunction) {
+ // ignore all llvm.* symbols
+ if (def->getName().startswith("llvm."))
+ return;
- std::string name = mangler.getNameWithPrefix(decl);
+ // string is owned by _defines
+ const char *symbolName = ::strdup(mangler.getNameWithPrefix(def).c_str());
- // we already have the symbol
- if (_undefines.find(name) != _undefines.end())
- return;
+ // set alignment part log2() can have rounding errors
+ uint32_t align = def->getAlignment();
+ uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
- NameAndAttributes info;
- // string is owned by _undefines
- info.name = ::strdup(name.c_str());
- if (decl->hasExternalWeakLinkage())
- info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
+ // set permissions part
+ if (isFunction)
+ attr |= LTO_SYMBOL_PERMISSIONS_CODE;
+ else {
+ GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
+ if (gv && gv->isConstant())
+ attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
else
- info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
- _undefines[name] = info;
+ attr |= LTO_SYMBOL_PERMISSIONS_DATA;
+ }
+
+ // set definition part
+ if (def->hasWeakLinkage() || def->hasLinkOnceLinkage()) {
+ attr |= LTO_SYMBOL_DEFINITION_WEAK;
+ }
+ else if (def->hasCommonLinkage()) {
+ attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
+ }
+ else {
+ attr |= LTO_SYMBOL_DEFINITION_REGULAR;
+ }
+
+ // set scope part
+ if (def->hasHiddenVisibility())
+ attr |= LTO_SYMBOL_SCOPE_HIDDEN;
+ else if (def->hasProtectedVisibility())
+ attr |= LTO_SYMBOL_SCOPE_PROTECTED;
+ else if (def->hasExternalLinkage() || def->hasWeakLinkage()
+ || def->hasLinkOnceLinkage() || def->hasCommonLinkage())
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+ else
+ attr |= LTO_SYMBOL_SCOPE_INTERNAL;
+
+ // add to table of symbols
+ NameAndAttributes info;
+ info.name = symbolName;
+ info.attributes = (lto_symbol_attributes)attr;
+ _symbols.push_back(info);
+ _defines[info.name] = 1;
}
+void LTOModule::addAsmGlobalSymbol(const char *name) {
+ // only add new define if not already defined
+ if (_defines.count(name))
+ return;
+
+ // string is owned by _defines
+ const char *symbolName = ::strdup(name);
+ uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+ NameAndAttributes info;
+ info.name = symbolName;
+ info.attributes = (lto_symbol_attributes)attr;
+ _symbols.push_back(info);
+ _defines[info.name] = 1;
+}
+void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
+ Mangler &mangler) {
+ // ignore all llvm.* symbols
+ if (decl->getName().startswith("llvm."))
+ return;
+
+ // ignore all aliases
+ if (isa<GlobalAlias>(decl))
+ return;
+
+ std::string name = mangler.getNameWithPrefix(decl);
+
+ // we already have the symbol
+ if (_undefines.find(name) != _undefines.end())
+ return;
+
+ NameAndAttributes info;
+ // string is owned by _undefines
+ info.name = ::strdup(name.c_str());
+ if (decl->hasExternalWeakLinkage())
+ info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
+ else
+ info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+ _undefines[name] = info;
+}
-// Find external symbols referenced by VALUE. This is a recursive function.
-void LTOModule::findExternalRefs(Value* value, Mangler &mangler) {
-
- if (GlobalValue* gv = dyn_cast<GlobalValue>(value)) {
- if (!gv->hasExternalLinkage())
- addPotentialUndefinedSymbol(gv, mangler);
- // If this is a variable definition, do not recursively process
- // initializer. It might contain a reference to this variable
- // and cause an infinite loop. The initializer will be
- // processed in addDefinedDataSymbol().
- return;
- }
- // GlobalValue, even with InternalLinkage type, may have operands with
- // ExternalLinkage type. Do not ignore these operands.
- if (Constant* c = dyn_cast<Constant>(value)) {
- // Handle ConstantExpr, ConstantStruct, ConstantArry etc.
- for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
- findExternalRefs(c->getOperand(i), mangler);
- }
+
+// Find external symbols referenced by VALUE. This is a recursive function.
+void LTOModule::findExternalRefs(Value *value, Mangler &mangler) {
+ if (GlobalValue *gv = dyn_cast<GlobalValue>(value)) {
+ if (!gv->hasExternalLinkage())
+ addPotentialUndefinedSymbol(gv, mangler);
+ // If this is a variable definition, do not recursively process
+ // initializer. It might contain a reference to this variable
+ // and cause an infinite loop. The initializer will be
+ // processed in addDefinedDataSymbol().
+ return;
+ }
+
+ // GlobalValue, even with InternalLinkage type, may have operands with
+ // ExternalLinkage type. Do not ignore these operands.
+ if (Constant *c = dyn_cast<Constant>(value)) {
+ // Handle ConstantExpr, ConstantStruct, ConstantArry etc.
+ for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
+ findExternalRefs(c->getOperand(i), mangler);
+ }
}
-void LTOModule::lazyParseSymbols()
-{
- if (!_symbolsParsed) {
- _symbolsParsed = true;
-
- // Use mangler to add GlobalPrefix to names to match linker names.
- MCContext Context(*_target->getMCAsmInfo());
- Mangler mangler(Context, *_target->getTargetData());
-
- // add functions
- for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
- if (f->isDeclaration())
- addPotentialUndefinedSymbol(f, mangler);
- else
- addDefinedFunctionSymbol(f, mangler);
- }
-
- // add data
- for (Module::global_iterator v = _module->global_begin(),
- e = _module->global_end(); v != e; ++v) {
- if (v->isDeclaration())
- addPotentialUndefinedSymbol(v, mangler);
- else
- addDefinedDataSymbol(v, mangler);
- }
+void LTOModule::lazyParseSymbols() {
+ if (_symbolsParsed)
+ return;
- // add asm globals
- const std::string &inlineAsm = _module->getModuleInlineAsm();
- const std::string glbl = ".globl";
- std::string asmSymbolName;
- std::string::size_type pos = inlineAsm.find(glbl, 0);
- while (pos != std::string::npos) {
- // eat .globl
- pos = pos + 6;
-
- // skip white space between .globl and symbol name
- std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos);
- if (pbegin == std::string::npos)
- break;
-
- // find end-of-line
- std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin);
- if (pend == std::string::npos)
- break;
-
- asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin);
- addAsmGlobalSymbol(asmSymbolName.c_str());
-
- // search next .globl
- pos = inlineAsm.find(glbl, pend);
- }
+ _symbolsParsed = true;
- // make symbols for all undefines
- for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
- it != _undefines.end(); ++it) {
- // if this symbol also has a definition, then don't make an undefine
- // because it is a tentative definition
- if (_defines.count(it->getKey()) == 0) {
- NameAndAttributes info = it->getValue();
- _symbols.push_back(info);
- }
- }
- }
+ // Use mangler to add GlobalPrefix to names to match linker names.
+ MCContext Context(*_target->getMCAsmInfo());
+ Mangler mangler(Context, *_target->getTargetData());
+
+ // add functions
+ for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
+ if (f->isDeclaration())
+ addPotentialUndefinedSymbol(f, mangler);
+ else
+ addDefinedFunctionSymbol(f, mangler);
+ }
+
+ // add data
+ for (Module::global_iterator v = _module->global_begin(),
+ e = _module->global_end(); v != e; ++v) {
+ if (v->isDeclaration())
+ addPotentialUndefinedSymbol(v, mangler);
+ else
+ addDefinedDataSymbol(v, mangler);
+ }
+
+ // add asm globals
+ const std::string &inlineAsm = _module->getModuleInlineAsm();
+ const std::string glbl = ".globl";
+ std::string asmSymbolName;
+ std::string::size_type pos = inlineAsm.find(glbl, 0);
+ while (pos != std::string::npos) {
+ // eat .globl
+ pos = pos + 6;
+
+ // skip white space between .globl and symbol name
+ std::string::size_type pbegin = inlineAsm.find_first_not_of(' ', pos);
+ if (pbegin == std::string::npos)
+ break;
+
+ // find end-of-line
+ std::string::size_type pend = inlineAsm.find_first_of('\n', pbegin);
+ if (pend == std::string::npos)
+ break;
+
+ asmSymbolName.assign(inlineAsm, pbegin, pend - pbegin);
+ addAsmGlobalSymbol(asmSymbolName.c_str());
+
+ // search next .globl
+ pos = inlineAsm.find(glbl, pend);
+ }
+
+ // make symbols for all undefines
+ for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
+ it != _undefines.end(); ++it) {
+ // if this symbol also has a definition, then don't make an undefine
+ // because it is a tentative definition
+ if (_defines.count(it->getKey()) == 0) {
+ NameAndAttributes info = it->getValue();
+ _symbols.push_back(info);
+ }
+ }
}
-uint32_t LTOModule::getSymbolCount()
-{
- lazyParseSymbols();
- return _symbols.size();
+uint32_t LTOModule::getSymbolCount() {
+ lazyParseSymbols();
+ return _symbols.size();
}
-lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index)
-{
- lazyParseSymbols();
- if (index < _symbols.size())
- return _symbols[index].attributes;
- else
- return lto_symbol_attributes(0);
+lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) {
+ lazyParseSymbols();
+ if (index < _symbols.size())
+ return _symbols[index].attributes;
+ else
+ return lto_symbol_attributes(0);
}
-const char* LTOModule::getSymbolName(uint32_t index)
-{
- lazyParseSymbols();
- if (index < _symbols.size())
- return _symbols[index].name;
- else
- return NULL;
+const char *LTOModule::getSymbolName(uint32_t index) {
+ lazyParseSymbols();
+ if (index < _symbols.size())
+ return _symbols[index].name;
+ else
+ return NULL;
}
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 7f475d40aa0a..a19acc0d7378 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -55,6 +55,7 @@ struct LTOModule {
std::string& errMsg);
const char* getTargetTriple();
+ void setTargetTriple(const char*);
uint32_t getSymbolCount();
lto_symbol_attributes getSymbolAttributes(uint32_t index);
const char* getSymbolName(uint32_t index);
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 8d57333f9c0c..e157a4c48aca 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -25,15 +25,22 @@ LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
include $(LEVEL)/Makefile.common
ifeq ($(HOST_OS),Darwin)
+ # Special hack to allow libLTO to have an offset version number.
+ ifdef LLVM_LTO_VERSION_OFFSET
+ LTO_LIBRARY_VERSION := $(shell expr $(LLVM_SUBMIT_VERSION) + \
+ $(LLVM_LTO_VERSION_OFFSET))
+ else
+ LTO_LIBRARY_VERSION := $(LLVM_SUBMIT_VERSION)
+ endif
+
# set dylib internal version number to llvmCore submission number
ifdef LLVM_SUBMIT_VERSION
LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
- -Wl,$(LLVM_SUBMIT_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
+ -Wl,$(LTO_LIBRARY_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
-Wl,-compatibility_version -Wl,1
endif
# extra options to override libtool defaults
LLVMLibsOptions := $(LLVMLibsOptions) \
- -avoid-version \
-Wl,-dead_strip \
-Wl,-seg1addr -Wl,0xE0000000
@@ -41,7 +48,7 @@ ifeq ($(HOST_OS),Darwin)
DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
ifneq ($(DARWIN_VERS),8)
LLVMLibsOptions := $(LLVMLibsOptions) \
- -no-undefined -Wl,-install_name \
+ -Wl,-install_name \
-Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
endif
endif
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index cc841bdf034d..3d7ef0a1cb94 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -120,6 +120,14 @@ const char* lto_module_get_target_triple(lto_module_t mod)
return mod->getTargetTriple();
}
+//
+// sets triple string with which the object will be codegened.
+//
+void lto_module_set_target_triple(lto_module_t mod, const char *triple)
+{
+ return mod->setTargetTriple(triple);
+}
+
//
// returns the number of symbols in the object module
@@ -142,7 +150,7 @@ const char* lto_module_get_symbol_name(lto_module_t mod, uint32_t index)
// returns the attributes of the ith symbol in the object module
//
lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod,
- uint32_t index)
+ uint32_t index)
{
return mod->getSymbolAttributes(index);
}
@@ -203,6 +211,14 @@ bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
}
//
+// sets the cpu to generate code for
+//
+void lto_codegen_set_cpu(lto_code_gen_t cg, const char* cpu)
+{
+ return cg->setCpu(cpu);
+}
+
+//
// sets the path to the assembler tool
//
void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
@@ -210,6 +226,16 @@ void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
cg->setAssemblerPath(path);
}
+
+//
+// sets extra arguments that libLTO should pass to the assembler
+//
+void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char** args,
+ int nargs)
+{
+ cg->setAssemblerArgs(args, nargs);
+}
+
//
// adds to a list of all global symbols that must exist in the final
// generated code. If a function is not listed there, it might be
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 9011cf602b1a..4dbf760d3882 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -6,6 +6,7 @@ lto_module_get_num_symbols
lto_module_get_symbol_attribute
lto_module_get_symbol_name
lto_module_get_target_triple
+lto_module_set_target_triple
lto_module_is_object_file
lto_module_is_object_file_for_target
lto_module_is_object_file_in_memory
@@ -20,4 +21,6 @@ lto_codegen_set_debug_model
lto_codegen_set_pic_model
lto_codegen_write_merged_modules
lto_codegen_debug_options
+lto_codegen_set_assembler_args
lto_codegen_set_assembler_path
+lto_codegen_set_cpu
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index f548d007d78e..a2b57bb3e115 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -31,7 +31,7 @@ namespace {
/// or handle in alias analyses.
struct ExternalFunctionsPassedConstants : public ModulePass {
static char ID; // Pass ID, replacement for typeid
- ExternalFunctionsPassedConstants() : ModulePass(&ID) {}
+ ExternalFunctionsPassedConstants() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration()) continue;
@@ -42,8 +42,8 @@ namespace {
Instruction *User = dyn_cast<Instruction>(*UI);
if (!User) continue;
- CallSite CS = CallSite::get(User);
- if (!CS.getInstruction()) continue;
+ CallSite CS(cast<Value>(User));
+ if (!CS) continue;
for (CallSite::arg_iterator AI = CS.arg_begin(),
E = CS.arg_end(); AI != E; ++AI) {
@@ -66,15 +66,17 @@ namespace {
AU.setPreservesAll();
}
};
+}
- char ExternalFunctionsPassedConstants::ID = 0;
- RegisterPass<ExternalFunctionsPassedConstants>
+char ExternalFunctionsPassedConstants::ID = 0;
+static RegisterPass<ExternalFunctionsPassedConstants>
P1("print-externalfnconstants",
"Print external fn callsites passed constants");
+namespace {
struct CallGraphPrinter : public ModulePass {
static char ID; // Pass ID, replacement for typeid
- CallGraphPrinter() : ModulePass(&ID) {}
+ CallGraphPrinter() : ModulePass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -85,8 +87,8 @@ namespace {
return false;
}
};
-
- char CallGraphPrinter::ID = 0;
- RegisterPass<CallGraphPrinter>
- P2("print-callgraph", "Print a call graph");
}
+
+char CallGraphPrinter::ID = 0;
+static RegisterPass<CallGraphPrinter>
+ P2("print-callgraph", "Print a call graph");
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index e7c6d1e6af85..9de7d6ac5459 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -28,13 +28,19 @@ static void WriteGraphToFile(raw_ostream &O, const std::string &GraphName,
std::string Filename = GraphName + ".dot";
O << "Writing '" << Filename << "'...";
std::string ErrInfo;
- raw_fd_ostream F(Filename.c_str(), ErrInfo);
-
- if (ErrInfo.empty())
- WriteGraph(F, GT);
- else
- O << " error opening file for writing!";
- O << "\n";
+ tool_output_file F(Filename.c_str(), ErrInfo);
+
+ if (ErrInfo.empty()) {
+ WriteGraph(F.os(), GT);
+ F.os().close();
+ if (!F.os().has_error()) {
+ O << "\n";
+ F.keep();
+ return;
+ }
+ }
+ O << " error opening file for writing!\n";
+ F.os().clear_error();
}
@@ -65,7 +71,7 @@ namespace llvm {
namespace {
struct CallGraphPrinter : public ModulePass {
static char ID; // Pass ID, replacement for typeid
- CallGraphPrinter() : ModulePass(&ID) {}
+ CallGraphPrinter() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
WriteGraphToFile(llvm::errs(), "callgraph", &getAnalysis<CallGraph>());
@@ -79,12 +85,12 @@ namespace {
AU.setPreservesAll();
}
};
-
- char CallGraphPrinter::ID = 0;
- RegisterPass<CallGraphPrinter> P2("dot-callgraph",
- "Print Call Graph to 'dot' file");
}
+char CallGraphPrinter::ID = 0;
+static RegisterPass<CallGraphPrinter> P2("dot-callgraph",
+ "Print Call Graph to 'dot' file");
+
//===----------------------------------------------------------------------===//
// DomInfoPrinter Pass
//===----------------------------------------------------------------------===//
@@ -93,7 +99,7 @@ namespace {
class DomInfoPrinter : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- DomInfoPrinter() : FunctionPass(&ID) {}
+ DomInfoPrinter() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -110,8 +116,8 @@ namespace {
return false;
}
};
-
- char DomInfoPrinter::ID = 0;
- static RegisterPass<DomInfoPrinter>
- DIP("print-dom-info", "Dominator Info Printer", true, true);
}
+
+char DomInfoPrinter::ID = 0;
+static RegisterPass<DomInfoPrinter>
+DIP("print-dom-info", "Dominator Info Printer", true, true);
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index ea486ca29000..533f49ec2a87 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
namespace {
struct CFGSCC : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSCC() : FunctionPass(&ID) {}
+ CFGSCC() : FunctionPass(ID) {}
bool runOnFunction(Function& func);
void print(raw_ostream &O, const Module* = 0) const { }
@@ -48,7 +48,7 @@ namespace {
struct CallGraphSCC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- CallGraphSCC() : ModulePass(&ID) {}
+ CallGraphSCC() : ModulePass(ID) {}
// run - Print out SCCs in the call graph for the specified module.
bool runOnModule(Module &M);
@@ -61,30 +61,30 @@ namespace {
AU.addRequired<CallGraph>();
}
};
+}
- char CFGSCC::ID = 0;
- RegisterPass<CFGSCC>
- Y("print-cfg-sccs", "Print SCCs of each function CFG");
+char CFGSCC::ID = 0;
+static RegisterPass<CFGSCC>
+Y("print-cfg-sccs", "Print SCCs of each function CFG");
- char CallGraphSCC::ID = 0;
- RegisterPass<CallGraphSCC>
- Z("print-callgraph-sccs", "Print SCCs of the Call Graph");
-}
+char CallGraphSCC::ID = 0;
+static RegisterPass<CallGraphSCC>
+Z("print-callgraph-sccs", "Print SCCs of the Call Graph");
bool CFGSCC::runOnFunction(Function &F) {
unsigned sccNum = 0;
- outs() << "SCCs for Function " << F.getName() << " in PostOrder:";
+ errs() << "SCCs for Function " << F.getName() << " in PostOrder:";
for (scc_iterator<Function*> SCCI = scc_begin(&F),
E = scc_end(&F); SCCI != E; ++SCCI) {
std::vector<BasicBlock*> &nextSCC = *SCCI;
- outs() << "\nSCC #" << ++sccNum << " : ";
+ errs() << "\nSCC #" << ++sccNum << " : ";
for (std::vector<BasicBlock*>::const_iterator I = nextSCC.begin(),
E = nextSCC.end(); I != E; ++I)
- outs() << (*I)->getName() << ", ";
+ errs() << (*I)->getName() << ", ";
if (nextSCC.size() == 1 && SCCI.hasLoop())
- outs() << " (Has self-loop).";
+ errs() << " (Has self-loop).";
}
- outs() << "\n";
+ errs() << "\n";
return true;
}
@@ -94,19 +94,19 @@ bool CFGSCC::runOnFunction(Function &F) {
bool CallGraphSCC::runOnModule(Module &M) {
CallGraphNode* rootNode = getAnalysis<CallGraph>().getRoot();
unsigned sccNum = 0;
- outs() << "SCCs for the program in PostOrder:";
+ errs() << "SCCs for the program in PostOrder:";
for (scc_iterator<CallGraphNode*> SCCI = scc_begin(rootNode),
E = scc_end(rootNode); SCCI != E; ++SCCI) {
const std::vector<CallGraphNode*> &nextSCC = *SCCI;
- outs() << "\nSCC #" << ++sccNum << " : ";
+ errs() << "\nSCC #" << ++sccNum << " : ";
for (std::vector<CallGraphNode*>::const_iterator I = nextSCC.begin(),
E = nextSCC.end(); I != E; ++I)
- outs() << ((*I)->getFunction() ? (*I)->getFunction()->getNameStr()
+ errs() << ((*I)->getFunction() ? (*I)->getFunction()->getNameStr()
: std::string("external node")) << ", ";
if (nextSCC.size() == 1 && SCCI.hasLoop())
- outs() << " (Has self-loop).";
+ errs() << " (Has self-loop).";
}
- outs() << "\n";
+ errs() << "\n";
return true;
}
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 0878737d34bc..d83718517918 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -53,7 +53,7 @@ InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
static cl::opt<std::string>
OutputFilename("o", cl::desc("Override output filename"),
- cl::value_desc("filename"), cl::init("-"));
+ cl::value_desc("filename"));
static cl::opt<bool>
Force("f", cl::desc("Enable binary output on terminals"));
@@ -138,17 +138,19 @@ namespace {
struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
static char ID;
const PassInfo *PassToPrint;
- CallGraphSCCPassPrinter(const PassInfo *PI) :
- CallGraphSCCPass(&ID), PassToPrint(PI) {}
+ raw_ostream &Out;
+ CallGraphSCCPassPrinter(const PassInfo *PI, raw_ostream &out) :
+ CallGraphSCCPass(ID), PassToPrint(PI), Out(out) {}
virtual bool runOnSCC(CallGraphSCC &SCC) {
if (!Quiet) {
- outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+ Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
if (F)
- getAnalysisID<Pass>(PassToPrint).print(outs(), F->getParent());
+ getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+ F->getParent());
}
}
// Get and print pass...
@@ -158,7 +160,7 @@ struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
virtual const char *getPassName() const { return "'Pass' Printer"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(PassToPrint);
+ AU.addRequiredID(PassToPrint->getTypeInfo());
AU.setPreservesAll();
}
};
@@ -168,13 +170,14 @@ char CallGraphSCCPassPrinter::ID = 0;
struct ModulePassPrinter : public ModulePass {
static char ID;
const PassInfo *PassToPrint;
- ModulePassPrinter(const PassInfo *PI) : ModulePass(&ID),
- PassToPrint(PI) {}
+ raw_ostream &Out;
+ ModulePassPrinter(const PassInfo *PI, raw_ostream &out)
+ : ModulePass(ID), PassToPrint(PI), Out(out) {}
virtual bool runOnModule(Module &M) {
if (!Quiet) {
- outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
- getAnalysisID<Pass>(PassToPrint).print(outs(), &M);
+ Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+ getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, &M);
}
// Get and print pass...
@@ -184,7 +187,7 @@ struct ModulePassPrinter : public ModulePass {
virtual const char *getPassName() const { return "'Pass' Printer"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(PassToPrint);
+ AU.addRequiredID(PassToPrint->getTypeInfo());
AU.setPreservesAll();
}
};
@@ -192,24 +195,26 @@ struct ModulePassPrinter : public ModulePass {
char ModulePassPrinter::ID = 0;
struct FunctionPassPrinter : public FunctionPass {
const PassInfo *PassToPrint;
+ raw_ostream &Out;
static char ID;
- FunctionPassPrinter(const PassInfo *PI) : FunctionPass(&ID),
- PassToPrint(PI) {}
+ FunctionPassPrinter(const PassInfo *PI, raw_ostream &out)
+ : FunctionPass(ID), PassToPrint(PI), Out(out) {}
virtual bool runOnFunction(Function &F) {
if (!Quiet) {
- outs() << "Printing analysis '" << PassToPrint->getPassName()
- << "' for function '" << F.getName() << "':\n";
+ Out << "Printing analysis '" << PassToPrint->getPassName()
+ << "' for function '" << F.getName() << "':\n";
}
// Get and print pass...
- getAnalysisID<Pass>(PassToPrint).print(outs(), F.getParent());
+ getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+ F.getParent());
return false;
}
virtual const char *getPassName() const { return "FunctionPass Printer"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(PassToPrint);
+ AU.addRequiredID(PassToPrint->getTypeInfo());
AU.setPreservesAll();
}
};
@@ -219,13 +224,14 @@ char FunctionPassPrinter::ID = 0;
struct LoopPassPrinter : public LoopPass {
static char ID;
const PassInfo *PassToPrint;
- LoopPassPrinter(const PassInfo *PI) :
- LoopPass(&ID), PassToPrint(PI) {}
+ raw_ostream &Out;
+ LoopPassPrinter(const PassInfo *PI, raw_ostream &out) :
+ LoopPass(ID), PassToPrint(PI), Out(out) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
if (!Quiet) {
- outs() << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
- getAnalysisID<Pass>(PassToPrint).print(outs(),
+ Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+ getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
L->getHeader()->getParent()->getParent());
}
// Get and print pass...
@@ -235,7 +241,7 @@ struct LoopPassPrinter : public LoopPass {
virtual const char *getPassName() const { return "'Pass' Printer"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(PassToPrint);
+ AU.addRequiredID(PassToPrint->getTypeInfo());
AU.setPreservesAll();
}
};
@@ -244,25 +250,27 @@ char LoopPassPrinter::ID = 0;
struct BasicBlockPassPrinter : public BasicBlockPass {
const PassInfo *PassToPrint;
+ raw_ostream &Out;
static char ID;
- BasicBlockPassPrinter(const PassInfo *PI)
- : BasicBlockPass(&ID), PassToPrint(PI) {}
+ BasicBlockPassPrinter(const PassInfo *PI, raw_ostream &out)
+ : BasicBlockPass(ID), PassToPrint(PI), Out(out) {}
virtual bool runOnBasicBlock(BasicBlock &BB) {
if (!Quiet) {
- outs() << "Printing Analysis info for BasicBlock '" << BB.getName()
- << "': Pass " << PassToPrint->getPassName() << ":\n";
+ Out << "Printing Analysis info for BasicBlock '" << BB.getName()
+ << "': Pass " << PassToPrint->getPassName() << ":\n";
}
// Get and print pass...
- getAnalysisID<Pass>(PassToPrint).print(outs(), BB.getParent()->getParent());
+ getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+ BB.getParent()->getParent());
return false;
}
virtual const char *getPassName() const { return "BasicBlockPass Printer"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(PassToPrint);
+ AU.addRequiredID(PassToPrint->getTypeInfo());
AU.setPreservesAll();
}
};
@@ -351,6 +359,11 @@ void AddStandardLinkPasses(PassManagerBase &PM) {
int main(int argc, char **argv) {
sys::PrintStackTraceOnErrorSignal();
llvm::PrettyStackTraceProgram X(argc, argv);
+
+ if (AnalyzeOnly && NoOutput) {
+ errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
+ return 1;
+ }
// Enable debug stream buffering.
EnableDebugBuffering = true;
@@ -377,35 +390,22 @@ int main(int argc, char **argv) {
}
// Figure out what stream we are supposed to write to...
- raw_ostream *Out = 0;
- bool DeleteStream = false;
- if (!NoOutput && !AnalyzeOnly) {
- if (OutputFilename == "-") {
- // Print to stdout.
- Out = &outs();
- // If we're printing a bitcode file, switch stdout to binary mode.
- // FIXME: This switches outs() globally, not just for the bitcode output.
- if (!OutputAssembly)
- sys::Program::ChangeStdoutToBinary();
- } else {
- if (NoOutput || AnalyzeOnly) {
- errs() << "WARNING: The -o (output filename) option is ignored when\n"
- "the --disable-output or --analyze options are used.\n";
- } else {
- // Make sure that the Output file gets unlinked from the disk if we get
- // a SIGINT.
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
- std::string ErrorInfo;
- Out = new raw_fd_ostream(OutputFilename.c_str(), ErrorInfo,
- raw_fd_ostream::F_Binary);
- if (!ErrorInfo.empty()) {
- errs() << ErrorInfo << '\n';
- delete Out;
- return 1;
- }
- DeleteStream = true;
- }
+ OwningPtr<tool_output_file> Out;
+ if (NoOutput) {
+ if (!OutputFilename.empty())
+ errs() << "WARNING: The -o (output filename) option is ignored when\n"
+ "the --disable-output option is used.\n";
+ } else {
+ // Default to standard output.
+ if (OutputFilename.empty())
+ OutputFilename = "-";
+
+ std::string ErrorInfo;
+ Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+ raw_fd_ostream::F_Binary));
+ if (!ErrorInfo.empty()) {
+ errs() << ErrorInfo << '\n';
+ return 1;
}
}
@@ -413,7 +413,7 @@ int main(int argc, char **argv) {
// console, print out a warning message and refuse to do it. We don't
// impress anyone by spewing tons of binary goo to a terminal.
if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly)
- if (CheckBitcodeOutputToConsole(*Out, !Quiet))
+ if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
NoOutput = true;
// Create a PassManager to hold and optimize the collection of passes we are
@@ -489,19 +489,19 @@ int main(int argc, char **argv) {
if (AnalyzeOnly) {
switch (Kind) {
case PT_BasicBlock:
- Passes.add(new BasicBlockPassPrinter(PassInf));
+ Passes.add(new BasicBlockPassPrinter(PassInf, Out->os()));
break;
case PT_Loop:
- Passes.add(new LoopPassPrinter(PassInf));
+ Passes.add(new LoopPassPrinter(PassInf, Out->os()));
break;
case PT_Function:
- Passes.add(new FunctionPassPrinter(PassInf));
+ Passes.add(new FunctionPassPrinter(PassInf, Out->os()));
break;
case PT_CallGraphSCC:
- Passes.add(new CallGraphSCCPassPrinter(PassInf));
+ Passes.add(new CallGraphSCCPassPrinter(PassInf, Out->os()));
break;
default:
- Passes.add(new ModulePassPrinter(PassInf));
+ Passes.add(new ModulePassPrinter(PassInf, Out->os()));
break;
}
}
@@ -538,19 +538,20 @@ int main(int argc, char **argv) {
if (!NoVerify && !VerifyEach)
Passes.add(createVerifierPass());
- // Write bitcode or assembly out to disk or outs() as the last step...
+ // Write bitcode or assembly to the output as the last step...
if (!NoOutput && !AnalyzeOnly) {
if (OutputAssembly)
- Passes.add(createPrintModulePass(Out));
+ Passes.add(createPrintModulePass(&Out->os()));
else
- Passes.add(createBitcodeWriterPass(*Out));
+ Passes.add(createBitcodeWriterPass(Out->os()));
}
// Now that we have all of the passes ready, run them.
Passes.run(*M.get());
- // Delete the raw_fd_ostream.
- if (DeleteStream)
- delete Out;
+ // Declare success.
+ if (!NoOutput)
+ Out->keep();
+
return 0;
}
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 991c7d6caac7..78dc393e5c18 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -13,6 +13,7 @@
#include "gtest/gtest.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
#include <stdarg.h>
#include <list>
@@ -76,7 +77,8 @@ public:
return c0.getValue() == c1.getValue();
}
- friend bool operator!=(const Constructable & c0, const Constructable & c1) {
+ friend bool ATTRIBUTE_UNUSED
+ operator!=(const Constructable & c0, const Constructable & c1) {
return c0.getValue() != c1.getValue();
}
};
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index 887ba5d1f9e6..7e4d0dcd413f 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -53,6 +53,14 @@ TEST(StringRefTest, StringOps) {
EXPECT_EQ( 1, StringRef("aab").compare("aaa"));
EXPECT_EQ(-1, StringRef("aab").compare("aabb"));
EXPECT_EQ( 1, StringRef("aab").compare("aa"));
+ EXPECT_EQ( 1, StringRef("\xFF").compare("\1"));
+
+ EXPECT_EQ(-1, StringRef("AaB").compare_lower("aAd"));
+ EXPECT_EQ( 0, StringRef("AaB").compare_lower("aab"));
+ EXPECT_EQ( 1, StringRef("AaB").compare_lower("AAA"));
+ EXPECT_EQ(-1, StringRef("AaB").compare_lower("aaBb"));
+ EXPECT_EQ( 1, StringRef("AaB").compare_lower("aA"));
+ EXPECT_EQ( 1, StringRef("\xFF").compare_lower("\1"));
EXPECT_EQ(-1, StringRef("aab").compare_numeric("aad"));
EXPECT_EQ( 0, StringRef("aab").compare_numeric("aab"));
@@ -64,6 +72,7 @@ TEST(StringRefTest, StringOps) {
EXPECT_EQ( 0, StringRef("10a").compare_numeric("10a"));
EXPECT_EQ( 1, StringRef("2").compare_numeric("1"));
EXPECT_EQ( 0, StringRef("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
+ EXPECT_EQ( 1, StringRef("\xFF").compare_numeric("\1"));
}
TEST(StringRefTest, Operators) {
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 1a9e81a0df74..067f5e5116cd 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -92,18 +92,117 @@ TEST(TripleTest, ParsedIDs) {
T = Triple("huh");
EXPECT_EQ(Triple::UnknownArch, T.getArch());
+}
- // Two exceptional cases.
+static std::string Join(StringRef A, StringRef B, StringRef C) {
+ std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C;
+ return Str;
+}
- T = Triple("i386-mingw32");
- EXPECT_EQ(Triple::x86, T.getArch());
- EXPECT_EQ(Triple::PC, T.getVendor());
- EXPECT_EQ(Triple::MinGW32, T.getOS());
+static std::string Join(StringRef A, StringRef B, StringRef C, StringRef D) {
+ std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C; Str += '-';
+ Str += D; return Str;
+}
- T = Triple("arm-elf");
- EXPECT_EQ(Triple::arm, T.getArch());
- EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
- EXPECT_EQ(Triple::UnknownOS, T.getOS());
+TEST(TripleTest, Normalization) {
+ EXPECT_EQ("", Triple::normalize(""));
+ EXPECT_EQ("-", Triple::normalize("-"));
+ EXPECT_EQ("--", Triple::normalize("--"));
+ EXPECT_EQ("---", Triple::normalize("---"));
+ EXPECT_EQ("----", Triple::normalize("----"));
+
+ EXPECT_EQ("a", Triple::normalize("a"));
+ EXPECT_EQ("a-b", Triple::normalize("a-b"));
+ EXPECT_EQ("a-b-c", Triple::normalize("a-b-c"));
+ EXPECT_EQ("a-b-c-d", Triple::normalize("a-b-c-d"));
+
+ EXPECT_EQ("i386-b-c", Triple::normalize("i386-b-c"));
+ EXPECT_EQ("i386-a-c", Triple::normalize("a-i386-c"));
+ EXPECT_EQ("i386-a-b", Triple::normalize("a-b-i386"));
+
+ EXPECT_EQ("a-pc-c", Triple::normalize("a-pc-c"));
+ EXPECT_EQ("-pc-b-c", Triple::normalize("pc-b-c"));
+ EXPECT_EQ("a-pc-b", Triple::normalize("a-b-pc"));
+
+ EXPECT_EQ("a-b-linux", Triple::normalize("a-b-linux"));
+ EXPECT_EQ("--linux-b-c", Triple::normalize("linux-b-c"));
+ EXPECT_EQ("a--linux-c", Triple::normalize("a-linux-c"));
+
+ EXPECT_EQ("i386-pc-a", Triple::normalize("a-pc-i386"));
+ EXPECT_EQ("i386-pc-", Triple::normalize("-pc-i386"));
+ EXPECT_EQ("-pc-linux-c", Triple::normalize("linux-pc-c"));
+ EXPECT_EQ("-pc-linux", Triple::normalize("linux-pc-"));
+
+ EXPECT_EQ("i386", Triple::normalize("i386"));
+ EXPECT_EQ("-pc", Triple::normalize("pc"));
+ EXPECT_EQ("--linux", Triple::normalize("linux"));
+
+ // Check that normalizing a permutated set of valid components returns a
+ // triple with the unpermuted components.
+ StringRef C[4];
+ C[3] = "environment";
+ for (int Arch = 1+Triple::UnknownArch; Arch < Triple::InvalidArch; ++Arch) {
+ C[0] = Triple::getArchTypeName(Triple::ArchType(Arch));
+ for (int Vendor = 1+Triple::UnknownVendor; Vendor <= Triple::PC;
+ ++Vendor) {
+ C[1] = Triple::getVendorTypeName(Triple::VendorType(Vendor));
+ for (int OS = 1+Triple::UnknownOS; OS <= Triple::Minix; ++OS) {
+ C[2] = Triple::getOSTypeName(Triple::OSType(OS));
+
+ std::string E = Join(C[0], C[1], C[2]);
+ std::string F = Join(C[0], C[1], C[2], C[3]);
+ EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[2], C[3])));
+
+ // If a value has multiple interpretations, then the permutation
+ // test will inevitably fail. Currently this is only the case for
+ // "psp" which parses as both an architecture and an O/S.
+ if (OS == Triple::Psp)
+ continue;
+
+ EXPECT_EQ(E, Triple::normalize(Join(C[0], C[2], C[1])));
+ EXPECT_EQ(E, Triple::normalize(Join(C[1], C[2], C[0])));
+ EXPECT_EQ(E, Triple::normalize(Join(C[1], C[0], C[2])));
+ EXPECT_EQ(E, Triple::normalize(Join(C[2], C[0], C[1])));
+ EXPECT_EQ(E, Triple::normalize(Join(C[2], C[1], C[0])));
+
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[3], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[3], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[1], C[3])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[1], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[2], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[3], C[0])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[0], C[3])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[0], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[2], C[0])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[2], C[3])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[3], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[0], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[1], C[0])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[1], C[3])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[3], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[3], C[0])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[0], C[3])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[1], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[2], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[2], C[0])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[0], C[2])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[0], C[1])));
+ EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[1], C[0])));
+ }
+ }
+ }
+
+ EXPECT_EQ("a-b-psp", Triple::normalize("a-b-psp"));
+ EXPECT_EQ("psp-b-c", Triple::normalize("psp-b-c"));
+
+ // Various real-world funky triples. The value returned by GCC's config.sub
+ // is given in the comment.
+ EXPECT_EQ("i386--mingw32", Triple::normalize("i386-mingw32")); // i386-pc-mingw32
+ EXPECT_EQ("x86_64--linux-gnu", Triple::normalize("x86_64-linux-gnu")); // x86_64-pc-linux-gnu
+ EXPECT_EQ("i486--linux-gnu", Triple::normalize("i486-linux-gnu")); // i486-pc-linux-gnu
+ EXPECT_EQ("i386-redhat-linux", Triple::normalize("i386-redhat-linux")); // i386-redhat-linux-gnu
+ EXPECT_EQ("i686--linux", Triple::normalize("i686-linux")); // i686-pc-linux-gnu
}
TEST(TripleTest, MutateName) {
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/Makefile b/unittests/Analysis/Makefile
index 5d785fd4821f..f89240ec7042 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/Makefile
+++ b/unittests/Analysis/Makefile
@@ -1,4 +1,4 @@
-##===- llvmc/example/Skeleton/plugins/Plugin/Makefile ------*- Makefile -*-===##
+##===- unittests/Analysis/Makefile -------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
@@ -7,11 +7,9 @@
#
##===----------------------------------------------------------------------===##
-LEVEL = $(LLVMC_BASE_LEVEL)/../..
+LEVEL = ../..
+TESTNAME = Analysis
+LINK_COMPONENTS := core support target analysis ipa
-# Change this to the name of your plugin.
-LLVMC_PLUGIN = PIC16Base
-
-BUILT_SOURCES = AutoGenerated.inc
-
-include $(LEVEL)/Makefile.common
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
new file mode 100644
index 000000000000..b7341603cf69
--- /dev/null
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -0,0 +1,82 @@
+//===- ScalarEvolutionsTest.cpp - ScalarEvolution unit tests --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Analysis/ScalarEvolutionExpressions.h>
+#include <llvm/GlobalVariable.h>
+#include <llvm/Constants.h>
+#include <llvm/LLVMContext.h>
+#include <llvm/Module.h>
+#include <llvm/PassManager.h>
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(ScalarEvolutionsTest, SCEVUnknownRAUW) {
+ LLVMContext Context;
+ Module M("world", Context);
+
+ const FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context),
+ std::vector<const Type *>(), false);
+ Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+ BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
+ ReturnInst::Create(Context, 0, BB);
+
+ const Type *Ty = Type::getInt1Ty(Context);
+ Constant *Init = Constant::getNullValue(Ty);
+ Value *V0 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V0");
+ Value *V1 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V1");
+ Value *V2 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V2");
+
+ // Create a ScalarEvolution and "run" it so that it gets initialized.
+ PassManager PM;
+ ScalarEvolution &SE = *new ScalarEvolution();
+ PM.add(&SE);
+ PM.run(M);
+
+ const SCEV *S0 = SE.getSCEV(V0);
+ const SCEV *S1 = SE.getSCEV(V1);
+ const SCEV *S2 = SE.getSCEV(V2);
+
+ const SCEV *P0 = SE.getAddExpr(S0, S0);
+ const SCEV *P1 = SE.getAddExpr(S1, S1);
+ const SCEV *P2 = SE.getAddExpr(S2, S2);
+
+ const SCEVMulExpr *M0 = cast<SCEVMulExpr>(P0);
+ const SCEVMulExpr *M1 = cast<SCEVMulExpr>(P1);
+ const SCEVMulExpr *M2 = cast<SCEVMulExpr>(P2);
+
+ EXPECT_EQ(cast<SCEVConstant>(M0->getOperand(0))->getValue()->getZExtValue(),
+ 2u);
+ EXPECT_EQ(cast<SCEVConstant>(M1->getOperand(0))->getValue()->getZExtValue(),
+ 2u);
+ EXPECT_EQ(cast<SCEVConstant>(M2->getOperand(0))->getValue()->getZExtValue(),
+ 2u);
+
+ // Before the RAUWs, these are all pointing to separate values.
+ EXPECT_EQ(cast<SCEVUnknown>(M0->getOperand(1))->getValue(), V0);
+ EXPECT_EQ(cast<SCEVUnknown>(M1->getOperand(1))->getValue(), V1);
+ EXPECT_EQ(cast<SCEVUnknown>(M2->getOperand(1))->getValue(), V2);
+
+ // Do some RAUWs.
+ V2->replaceAllUsesWith(V1);
+ V1->replaceAllUsesWith(V0);
+
+ // After the RAUWs, these should all be pointing to V0.
+ EXPECT_EQ(cast<SCEVUnknown>(M0->getOperand(1))->getValue(), V0);
+ EXPECT_EQ(cast<SCEVUnknown>(M1->getOperand(1))->getValue(), V0);
+ EXPECT_EQ(cast<SCEVUnknown>(M2->getOperand(1))->getValue(), V0);
+
+ // Manually clean up, since we allocated new SCEV objects after the
+ // pass was finished.
+ SE.releaseMemory();
+}
+
+} // end anonymous namespace
+} // end namespace llvm
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 8f0582d3e8de..ceacbbe62a45 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -65,8 +65,6 @@ public:
stubsAllocated = 0;
}
- void setSizeRequired(bool Required) { SizeRequired = Required; }
-
virtual void setMemoryWritable() { Base->setMemoryWritable(); }
virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
virtual void setPoisonMemory(bool poison) { Base->setPoisonMemory(poison); }
@@ -630,31 +628,6 @@ TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) {
<< " not 7 from the IR version.";
}
-TEST_F(JITTest, NeedsExactSizeWithManyGlobals) {
- // PR5291: When the JMM needed the exact size of function bodies before
- // starting to emit them, the JITEmitter would modify a set while iterating
- // over it.
- TheJIT->DisableLazyCompilation(true);
- RJMM->setSizeRequired(true);
-
- LoadAssembly("@A = global i32 42 "
- "@B = global i32* @A "
- "@C = global i32** @B "
- "@D = global i32*** @C "
- "@E = global i32**** @D "
- "@F = global i32***** @E "
- "@G = global i32****** @F "
- "@H = global i32******* @G "
- "@I = global i32******** @H "
- "define i32********* @test() { "
- " ret i32********* @I "
- "}");
- Function *testIR = M->getFunction("test");
- int32_t********* (*test)() = reinterpret_cast<int32_t*********(*)()>(
- (intptr_t)TheJIT->getPointerToFunction(testIR));
- EXPECT_EQ(42, *********test());
-}
-
TEST_F(JITTest, EscapedLazyStubStillCallable) {
TheJIT->DisableLazyCompilation(false);
LoadAssembly("define internal i32 @stubbed() { "
diff --git a/unittests/Makefile b/unittests/Makefile
index 9f377cd744c1..0401cd1c673a 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,7 +9,7 @@
LEVEL = ..
-PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore Analysis
include $(LEVEL)/Makefile.common
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 2a701a019d8b..9a75b2c2eb8f 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -37,10 +37,10 @@ TESTLIBS = -lGoogleTest -lUnitTestMain
ifeq ($(ENABLE_SHARED), 1)
# Add the absolute path to the dynamic library. This is ok because
# we'll never install unittests.
- LD.Flags += $(RPATH) -Wl,$(LibDir)
+ LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
# Also set {DYLD,LD}_LIBRARY_PATH because OSX ignores the rpath most
# of the time.
- Run.Shared := $(SHLIBPATH_VAR)="$(LibDir)$${$(SHLIBPATH_VAR):+:}$$$(SHLIBPATH_VAR)"
+ Run.Shared := $(SHLIBPATH_VAR)="$(SharedLibDir)$${$(SHLIBPATH_VAR):+:}$$$(SHLIBPATH_VAR)"
endif
$(LLVMUnitTestExe): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
new file mode 100644
index 000000000000..ae84693bd636
--- /dev/null
+++ b/unittests/Support/Casting.cpp
@@ -0,0 +1,154 @@
+//===---------- llvm/unittest/Support/Casting.cpp - Casting tests ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+namespace llvm {
+
+// set up two example classes
+// with conversion facility
+//
+struct bar {
+ bar() {}
+ struct foo *baz();
+ struct foo *caz();
+ struct foo *daz();
+ struct foo *naz();
+private:
+ bar(const bar &);
+};
+struct foo {
+ void ext() const;
+ /* static bool classof(const bar *X) {
+ cerr << "Classof: " << X << "\n";
+ return true;
+ }*/
+};
+
+template <> struct isa_impl<foo, bar> {
+ static inline bool doit(const bar &Val) {
+ dbgs() << "Classof: " << &Val << "\n";
+ return true;
+ }
+};
+
+foo *bar::baz() {
+ return cast<foo>(this);
+}
+
+foo *bar::caz() {
+ return cast_or_null<foo>(this);
+}
+
+foo *bar::daz() {
+ return dyn_cast<foo>(this);
+}
+
+foo *bar::naz() {
+ return dyn_cast_or_null<foo>(this);
+}
+
+
+bar *fub();
+} // End llvm namespace
+
+using namespace llvm;
+
+namespace {
+
+const foo *null_foo = NULL;
+
+extern bar &B1;
+extern const bar *B2;
+// test various configurations of const
+const bar &B3 = B1;
+const bar *const B4 = B2;
+
+TEST(CastingTest, isa) {
+ EXPECT_TRUE(isa<foo>(B1));
+ EXPECT_TRUE(isa<foo>(B2));
+ EXPECT_TRUE(isa<foo>(B3));
+ EXPECT_TRUE(isa<foo>(B4));
+}
+
+TEST(CastingTest, cast) {
+ foo &F1 = cast<foo>(B1);
+ EXPECT_NE(&F1, null_foo);
+ const foo *F3 = cast<foo>(B2);
+ EXPECT_NE(F3, null_foo);
+ const foo *F4 = cast<foo>(B2);
+ EXPECT_NE(F4, null_foo);
+ const foo &F5 = cast<foo>(B3);
+ EXPECT_NE(&F5, null_foo);
+ const foo *F6 = cast<foo>(B4);
+ EXPECT_NE(F6, null_foo);
+ foo *F7 = cast<foo>(fub());
+ EXPECT_EQ(F7, null_foo);
+ foo *F8 = B1.baz();
+ EXPECT_NE(F8, null_foo);
+}
+
+TEST(CastingTest, cast_or_null) {
+ const foo *F11 = cast_or_null<foo>(B2);
+ EXPECT_NE(F11, null_foo);
+ const foo *F12 = cast_or_null<foo>(B2);
+ EXPECT_NE(F12, null_foo);
+ const foo *F13 = cast_or_null<foo>(B4);
+ EXPECT_NE(F13, null_foo);
+ const foo *F14 = cast_or_null<foo>(fub()); // Shouldn't print.
+ EXPECT_EQ(F14, null_foo);
+ foo *F15 = B1.caz();
+ EXPECT_NE(F15, null_foo);
+}
+
+TEST(CastingTest, dyn_cast) {
+ const foo *F1 = dyn_cast<foo>(B2);
+ EXPECT_NE(F1, null_foo);
+ const foo *F2 = dyn_cast<foo>(B2);
+ EXPECT_NE(F2, null_foo);
+ const foo *F3 = dyn_cast<foo>(B4);
+ EXPECT_NE(F3, null_foo);
+ // foo *F4 = dyn_cast<foo>(fub()); // not permittible
+ // EXPECT_EQ(F4, null_foo);
+ foo *F5 = B1.daz();
+ EXPECT_NE(F5, null_foo);
+}
+
+TEST(CastingTest, dyn_cast_or_null) {
+ const foo *F1 = dyn_cast_or_null<foo>(B2);
+ EXPECT_NE(F1, null_foo);
+ const foo *F2 = dyn_cast_or_null<foo>(B2);
+ EXPECT_NE(F2, null_foo);
+ const foo *F3 = dyn_cast_or_null<foo>(B4);
+ EXPECT_NE(F3, null_foo);
+ foo *F4 = dyn_cast_or_null<foo>(fub());
+ EXPECT_EQ(F4, null_foo);
+ foo *F5 = B1.naz();
+ EXPECT_NE(F5, null_foo);
+}
+
+// These lines are errors...
+//foo *F20 = cast<foo>(B2); // Yields const foo*
+//foo &F21 = cast<foo>(B3); // Yields const foo&
+//foo *F22 = cast<foo>(B4); // Yields const foo*
+//foo &F23 = cast_or_null<foo>(B1);
+//const foo &F24 = cast_or_null<foo>(B3);
+
+
+bar B;
+bar &B1 = B;
+const bar *B2 = &B;
+} // anonymous namespace
+
+bar *llvm::fub() { return 0; }
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 6b8d01d553f4..091ecd4aadeb 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -33,6 +33,7 @@ ConstantRange ConstantRangeTest::Wrap(APInt(16, 0xaaa), APInt(16, 0xa));
TEST_F(ConstantRangeTest, Basics) {
EXPECT_TRUE(Full.isFullSet());
EXPECT_FALSE(Full.isEmptySet());
+ EXPECT_TRUE(Full.inverse().isEmptySet());
EXPECT_FALSE(Full.isWrappedSet());
EXPECT_TRUE(Full.contains(APInt(16, 0x0)));
EXPECT_TRUE(Full.contains(APInt(16, 0x9)));
@@ -42,6 +43,7 @@ TEST_F(ConstantRangeTest, Basics) {
EXPECT_FALSE(Empty.isFullSet());
EXPECT_TRUE(Empty.isEmptySet());
+ EXPECT_TRUE(Empty.inverse().isFullSet());
EXPECT_FALSE(Empty.isWrappedSet());
EXPECT_FALSE(Empty.contains(APInt(16, 0x0)));
EXPECT_FALSE(Empty.contains(APInt(16, 0x9)));
@@ -57,6 +59,7 @@ TEST_F(ConstantRangeTest, Basics) {
EXPECT_TRUE(One.contains(APInt(16, 0xa)));
EXPECT_FALSE(One.contains(APInt(16, 0xaa9)));
EXPECT_FALSE(One.contains(APInt(16, 0xaaa)));
+ EXPECT_FALSE(One.inverse().contains(APInt(16, 0xa)));
EXPECT_FALSE(Some.isFullSet());
EXPECT_FALSE(Some.isEmptySet());
@@ -256,11 +259,31 @@ TEST_F(ConstantRangeTest, Add) {
EXPECT_EQ(Empty.add(Wrap), Empty);
EXPECT_EQ(Empty.add(APInt(16, 4)), Empty);
EXPECT_EQ(Some.add(APInt(16, 4)),
- ConstantRange(APInt(16, 0xe), APInt(16, 0xaae)));
+ ConstantRange(APInt(16, 0xe), APInt(16, 0xaae)));
EXPECT_EQ(Wrap.add(APInt(16, 4)),
- ConstantRange(APInt(16, 0xaae), APInt(16, 0xe)));
+ ConstantRange(APInt(16, 0xaae), APInt(16, 0xe)));
EXPECT_EQ(One.add(APInt(16, 4)),
- ConstantRange(APInt(16, 0xe)));
+ ConstantRange(APInt(16, 0xe)));
+}
+
+TEST_F(ConstantRangeTest, Sub) {
+ EXPECT_EQ(Full.sub(APInt(16, 4)), Full);
+ EXPECT_EQ(Full.sub(Full), Full);
+ EXPECT_EQ(Full.sub(Empty), Empty);
+ EXPECT_EQ(Full.sub(One), Full);
+ EXPECT_EQ(Full.sub(Some), Full);
+ EXPECT_EQ(Full.sub(Wrap), Full);
+ EXPECT_EQ(Empty.sub(Empty), Empty);
+ EXPECT_EQ(Empty.sub(One), Empty);
+ EXPECT_EQ(Empty.sub(Some), Empty);
+ EXPECT_EQ(Empty.sub(Wrap), Empty);
+ EXPECT_EQ(Empty.sub(APInt(16, 4)), Empty);
+ EXPECT_EQ(Some.sub(APInt(16, 4)),
+ ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+ EXPECT_EQ(Wrap.sub(APInt(16, 4)),
+ ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
+ EXPECT_EQ(One.sub(APInt(16, 4)),
+ ConstantRange(APInt(16, 0x6)));
}
TEST_F(ConstantRangeTest, Multiply) {
@@ -348,4 +371,44 @@ TEST_F(ConstantRangeTest, UDiv) {
EXPECT_EQ(Wrap.udiv(Wrap), Full);
}
+TEST_F(ConstantRangeTest, Shl) {
+ EXPECT_EQ(Full.shl(Full), Full);
+ EXPECT_EQ(Full.shl(Empty), Empty);
+ EXPECT_EQ(Full.shl(One), Full); // TODO: [0, (-1 << 0xa) + 1)
+ EXPECT_EQ(Full.shl(Some), Full); // TODO: [0, (-1 << 0xa) + 1)
+ EXPECT_EQ(Full.shl(Wrap), Full);
+ EXPECT_EQ(Empty.shl(Empty), Empty);
+ EXPECT_EQ(Empty.shl(One), Empty);
+ EXPECT_EQ(Empty.shl(Some), Empty);
+ EXPECT_EQ(Empty.shl(Wrap), Empty);
+ EXPECT_EQ(One.shl(One), ConstantRange(APInt(16, 0xa << 0xa),
+ APInt(16, (0xa << 0xa) + 1)));
+ EXPECT_EQ(One.shl(Some), Full); // TODO: [0xa << 0xa, 0)
+ EXPECT_EQ(One.shl(Wrap), Full); // TODO: [0xa, 0xa << 14 + 1)
+ EXPECT_EQ(Some.shl(Some), Full); // TODO: [0xa << 0xa, 0xfc01)
+ EXPECT_EQ(Some.shl(Wrap), Full); // TODO: [0xa, 0x7ff << 0x5 + 1)
+ EXPECT_EQ(Wrap.shl(Wrap), Full);
+}
+
+TEST_F(ConstantRangeTest, Lshr) {
+ EXPECT_EQ(Full.lshr(Full), Full);
+ EXPECT_EQ(Full.lshr(Empty), Empty);
+ EXPECT_EQ(Full.lshr(One), ConstantRange(APInt(16, 0),
+ APInt(16, (0xffff >> 0xa) + 1)));
+ EXPECT_EQ(Full.lshr(Some), ConstantRange(APInt(16, 0),
+ APInt(16, (0xffff >> 0xa) + 1)));
+ EXPECT_EQ(Full.lshr(Wrap), Full);
+ EXPECT_EQ(Empty.lshr(Empty), Empty);
+ EXPECT_EQ(Empty.lshr(One), Empty);
+ EXPECT_EQ(Empty.lshr(Some), Empty);
+ EXPECT_EQ(Empty.lshr(Wrap), Empty);
+ EXPECT_EQ(One.lshr(One), ConstantRange(APInt(16, 0)));
+ EXPECT_EQ(One.lshr(Some), ConstantRange(APInt(16, 0)));
+ EXPECT_EQ(One.lshr(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xb)));
+ EXPECT_EQ(Some.lshr(Some), ConstantRange(APInt(16, 0),
+ APInt(16, (0xaaa >> 0xa) + 1)));
+ EXPECT_EQ(Some.lshr(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xaaa)));
+ EXPECT_EQ(Wrap.lshr(Wrap), Full);
+}
+
} // anonymous namespace
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index 6a6528fbddfb..ba610ea4ff9a 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -35,7 +35,6 @@ protected:
class ConcreteCallbackVH : public CallbackVH {
public:
- ConcreteCallbackVH() : CallbackVH() {}
ConcreteCallbackVH(Value *V) : CallbackVH(V) {}
};
diff --git a/unittests/VMCore/DerivedTypesTest.cpp b/unittests/VMCore/DerivedTypesTest.cpp
index 2e0450d6e5ce..9dea6ff2a904 100644
--- a/unittests/VMCore/DerivedTypesTest.cpp
+++ b/unittests/VMCore/DerivedTypesTest.cpp
@@ -9,13 +9,66 @@
#include "gtest/gtest.h"
#include "../lib/VMCore/LLVMContextImpl.h"
-#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Constants.h"
+#include "llvm/Support/ValueHandle.h"
using namespace llvm;
namespace {
+static void PR7658() {
+ LLVMContext ctx;
+
+ WeakVH NullPtr;
+ PATypeHolder h1;
+ {
+ OpaqueType *o1 = OpaqueType::get(ctx);
+ PointerType *p1 = PointerType::get(o1, 0);
+
+ std::vector<const Type *> t1;
+ t1.push_back(IntegerType::get(ctx, 32));
+ t1.push_back(p1);
+ NullPtr = ConstantPointerNull::get(p1);
+ OpaqueType *o2 = OpaqueType::get (ctx);
+ PointerType *p2 = PointerType::get (o2, 0);
+ t1.push_back(p2);
+
+
+ StructType *s1 = StructType::get(ctx, t1);
+ h1 = s1;
+ o1->refineAbstractTypeTo(s1);
+ o2->refineAbstractTypeTo(h1.get()); // h1 = { i32, \2*, \2* }
+ }
+
+
+ OpaqueType *o3 = OpaqueType::get(ctx);
+ PointerType *p3 = PointerType::get(o3, 0); // p3 = opaque*
+
+ std::vector<const Type *> t2;
+ t2.push_back(IntegerType::get(ctx, 32));
+ t2.push_back(p3);
+
+ std::vector<Constant *> v2;
+ v2.push_back(ConstantInt::get(IntegerType::get(ctx, 32), 14));
+ v2.push_back(ConstantPointerNull::get(p3));
+
+ OpaqueType *o4 = OpaqueType::get(ctx);
+ {
+ PointerType *p4 = PointerType::get(o4, 0);
+ t2.push_back(p4);
+ v2.push_back(ConstantPointerNull::get(p4));
+ }
+
+ WeakVH CS = ConstantStruct::get(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
+
+ StructType *s2 = StructType::get(ctx, t2);
+ PATypeHolder h2(s2);
+ o3->refineAbstractTypeTo(s2);
+ o4->refineAbstractTypeTo(h2.get());
+}
+
+
TEST(OpaqueTypeTest, RegisterWithContext) {
LLVMContext C;
LLVMContextImpl *pImpl = C.pImpl;
@@ -28,6 +81,8 @@ TEST(OpaqueTypeTest, RegisterWithContext) {
EXPECT_EQ(2u, pImpl->OpaqueTypes.size());
}
EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
+
+ PR7658();
}
} // namespace
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index c1baa74487aa..c9fe2a13166f 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -59,9 +59,9 @@ TEST(InstructionsTest, BranchInst) {
EXPECT_EQ(b0->getNumOperands(), 1U);
EXPECT_NE(b0->op_begin(), b0->op_end());
- EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+ EXPECT_EQ(llvm::next(b0->op_begin()), b0->op_end());
- EXPECT_EQ(next(b0->op_begin()), b0->op_end());
+ EXPECT_EQ(llvm::next(b0->op_begin()), b0->op_end());
const IntegerType* Int1 = IntegerType::get(C, 1);
Constant* One = ConstantInt::get(Int1, 1, true);
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index 04db486cd871..942b84823250 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -130,11 +130,12 @@ TEST(NamedMDNodeTest, Search) {
MDNode *n = MDNode::get(Context, &V, 1);
MDNode *n2 = MDNode::get(Context, &V2, 1);
- MDNode *Nodes[2] = { n, n2 };
-
Module M("MyModule", Context);
const char *Name = "llvm.NMD1";
- NamedMDNode *NMD = NamedMDNode::Create(Context, Name, &Nodes[0], 2, &M);
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata(Name);
+ NMD->addOperand(n);
+ NMD->addOperand(n2);
+
std::string Str;
raw_string_ostream oss(Str);
NMD->print(oss);
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index cabfc44602cf..96ee5b458960 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -40,7 +40,7 @@ namespace llvm {
public:
static char run;
static char ID;
- ModuleNDNM() : ModulePass(&ID) {}
+ ModuleNDNM() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
run++;
return false;
@@ -56,7 +56,7 @@ namespace llvm {
public:
static char run;
static char ID;
- ModuleNDM() : ModulePass(&ID) {}
+ ModuleNDM() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
run++;
return true;
@@ -70,7 +70,7 @@ namespace llvm {
public:
static char run;
static char ID;
- ModuleNDM2() : ModulePass(&ID) {}
+ ModuleNDM2() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
run++;
return true;
@@ -83,7 +83,7 @@ namespace llvm {
public:
static char run;
static char ID;
- ModuleDNM() : ModulePass(&ID) {}
+ ModuleDNM() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
run++;
@@ -119,7 +119,7 @@ namespace llvm {
EXPECT_TRUE(finalized);
EXPECT_EQ(run, runc);
}
- PassTestBase() : P(&ID), allocated(0) {
+ PassTestBase() : P(ID), allocated(0) {
initialized = false;
finalized = false;
runc = 0;
@@ -253,7 +253,7 @@ namespace llvm {
struct OnTheFlyTest: public ModulePass {
public:
static char ID;
- OnTheFlyTest() : ModulePass(&ID) {}
+ OnTheFlyTest() : ModulePass(ID) {}
virtual bool runOnModule(Module &M) {
EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
for (Module::iterator I=M.begin(),E=M.end(); I != E; ++I) {
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index e7cd713ce601..cd76d4404308 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -49,32 +49,32 @@ NoCanonicalizeWhiteSpace("strict-whitespace",
class Pattern {
SMLoc PatternLoc;
-
+
/// FixedStr - If non-empty, this pattern is a fixed string match with the
/// specified fixed string.
StringRef FixedStr;
-
+
/// RegEx - If non-empty, this is a regex pattern.
std::string RegExStr;
-
+
/// VariableUses - Entries in this vector map to uses of a variable in the
/// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
/// "foobaz" and we'll get an entry in this vector that tells us to insert the
/// value of bar at offset 3.
std::vector<std::pair<StringRef, unsigned> > VariableUses;
-
+
/// VariableDefs - Entries in this vector map to definitions of a variable in
/// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will
/// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The
/// index indicates what parenthesized value captures the variable value.
std::vector<std::pair<StringRef, unsigned> > VariableDefs;
-
+
public:
-
+
Pattern() { }
-
+
bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
-
+
/// Match - Match the pattern string against the input buffer Buffer. This
/// returns the position that is matched or npos if there is no match. If
/// there is a match, the size of the matched string is returned in MatchLen.
@@ -103,19 +103,19 @@ private:
bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
PatternLoc = SMLoc::getFromPointer(PatternStr.data());
-
+
// Ignore trailing whitespace.
while (!PatternStr.empty() &&
(PatternStr.back() == ' ' || PatternStr.back() == '\t'))
PatternStr = PatternStr.substr(0, PatternStr.size()-1);
-
+
// Check that there is something on the line.
if (PatternStr.empty()) {
SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
CheckPrefix+":'", "error");
return true;
}
-
+
// Check to see if this is a fixed string, or if it has regex pieces.
if (PatternStr.size() < 2 ||
(PatternStr.find("{{") == StringRef::npos &&
@@ -123,18 +123,18 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
FixedStr = PatternStr;
return false;
}
-
+
// Paren value #0 is for the fully matched string. Any new parenthesized
// values add from their.
unsigned CurParen = 1;
-
+
// Otherwise, there is at least one regex piece. Build up the regex pattern
// by escaping scary characters in fixed strings, building up one big regex.
while (!PatternStr.empty()) {
// RegEx matches.
if (PatternStr.size() >= 2 &&
PatternStr[0] == '{' && PatternStr[1] == '{') {
-
+
// Otherwise, this is the start of a regex match. Scan for the }}.
size_t End = PatternStr.find("}}");
if (End == StringRef::npos) {
@@ -142,13 +142,13 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
"found start of regex string with no end '}}'", "error");
return true;
}
-
+
if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
return true;
PatternStr = PatternStr.substr(End+2);
continue;
}
-
+
// Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
// (or some other regex) and assigns it to the FileCheck variable 'foo'. The
// second form is [[foo]] which is a reference to foo. The variable name
@@ -163,14 +163,14 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
"invalid named regex reference, no ]] found", "error");
return true;
}
-
+
StringRef MatchStr = PatternStr.substr(2, End-2);
PatternStr = PatternStr.substr(End+2);
-
+
// Get the regex name (e.g. "foo").
size_t NameEnd = MatchStr.find(':');
StringRef Name = MatchStr.substr(0, NameEnd);
-
+
if (Name.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
"invalid name in named regex: empty name", "error");
@@ -187,31 +187,31 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
"invalid name in named regex", "error");
return true;
}
-
+
// Name can't start with a digit.
if (isdigit(Name[0])) {
SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
"invalid name in named regex", "error");
return true;
}
-
+
// Handle [[foo]].
if (NameEnd == StringRef::npos) {
VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
continue;
}
-
+
// Handle [[foo:.*]].
VariableDefs.push_back(std::make_pair(Name, CurParen));
RegExStr += '(';
++CurParen;
-
+
if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
return true;
RegExStr += ')';
}
-
+
// Handle fixed string matches.
// Find the end, which is the start of the next regex.
size_t FixedMatchEnd = PatternStr.find("{{");
@@ -260,7 +260,7 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
"invalid regex: " + Error, "error");
return true;
}
-
+
RegExStr += RegexStr.str();
CurParen += R.getNumMatches();
return false;
@@ -278,14 +278,14 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
}
// Regex match.
-
+
// If there are variable uses, we need to create a temporary string with the
// actual value.
StringRef RegExToMatch = RegExStr;
std::string TmpStr;
if (!VariableUses.empty()) {
TmpStr = RegExStr;
-
+
unsigned InsertOffset = 0;
for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
StringMap<StringRef>::iterator it =
@@ -297,33 +297,33 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
// Look up the value and escape it so that we can plop it into the regex.
std::string Value;
AddFixedStringToRegEx(it->second, Value);
-
+
// Plop it into the regex at the adjusted offset.
TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
Value.begin(), Value.end());
InsertOffset += Value.size();
}
-
+
// Match the newly constructed regex.
RegExToMatch = TmpStr;
}
-
-
+
+
SmallVector<StringRef, 4> MatchInfo;
if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
return StringRef::npos;
-
+
// Successful regex match.
assert(!MatchInfo.empty() && "Didn't get any match");
StringRef FullMatch = MatchInfo[0];
-
+
// If this defines any variables, remember their values.
for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
assert(VariableDefs[i].second < MatchInfo.size() &&
"Internal paren error");
VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
}
-
+
MatchLen = FullMatch.size();
return FullMatch.data()-Buffer.data();
}
@@ -421,19 +421,19 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
struct CheckString {
/// Pat - The pattern to match.
Pattern Pat;
-
+
/// Loc - The location in the match file that the check string was specified.
SMLoc Loc;
-
+
/// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
/// to a CHECK: directive.
bool IsCheckNext;
-
+
/// NotStrings - These are all of the strings that are disallowed from
/// occurring between this match string and the previous one (or start of
/// file).
std::vector<std::pair<SMLoc, Pattern> > NotStrings;
-
+
CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
: Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
};
@@ -443,7 +443,7 @@ struct CheckString {
static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
SmallString<128> NewFile;
NewFile.reserve(MB->getBufferSize());
-
+
for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
Ptr != End; ++Ptr) {
// If C is not a horizontal whitespace, skip it.
@@ -451,18 +451,18 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
NewFile.push_back(*Ptr);
continue;
}
-
+
// Otherwise, add one space and advance over neighboring space.
NewFile.push_back(' ');
while (Ptr+1 != End &&
(Ptr[1] == ' ' || Ptr[1] == '\t'))
++Ptr;
}
-
+
// Free the old buffer and return a new one.
MemoryBuffer *MB2 =
MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
-
+
delete MB;
return MB2;
}
@@ -477,37 +477,37 @@ static bool ReadCheckFile(SourceMgr &SM,
MemoryBuffer *F =
MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), &ErrorStr);
if (F == 0) {
- errs() << "Could not open check file '" << CheckFilename << "': "
+ errs() << "Could not open check file '" << CheckFilename << "': "
<< ErrorStr << '\n';
return true;
}
-
+
// If we want to canonicalize whitespace, strip excess whitespace from the
// buffer containing the CHECK lines.
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
-
+
SM.AddNewSourceBuffer(F, SMLoc());
// Find all instances of CheckPrefix followed by : in the file.
StringRef Buffer = F->getBuffer();
std::vector<std::pair<SMLoc, Pattern> > NotMatches;
-
+
while (1) {
// See if Prefix occurs in the memory buffer.
Buffer = Buffer.substr(Buffer.find(CheckPrefix));
-
+
// If we didn't find a match, we're done.
if (Buffer.empty())
break;
-
+
const char *CheckPrefixStart = Buffer.data();
-
+
// When we find a check prefix, keep track of whether we find CHECK: or
// CHECK-NEXT:
bool IsCheckNext = false, IsCheckNot = false;
-
+
// Verify that the : is present after the prefix.
if (Buffer[CheckPrefix.size()] == ':') {
Buffer = Buffer.substr(CheckPrefix.size()+1);
@@ -523,11 +523,11 @@ static bool ReadCheckFile(SourceMgr &SM,
Buffer = Buffer.substr(1);
continue;
}
-
+
// Okay, we found the prefix, yay. Remember the rest of the line, but
// ignore leading and trailing whitespace.
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
-
+
// Scan ahead to the end of line.
size_t EOL = Buffer.find_first_of("\n\r");
@@ -538,10 +538,10 @@ static bool ReadCheckFile(SourceMgr &SM,
Pattern P;
if (P.ParsePattern(Buffer.substr(0, EOL), SM))
return true;
-
+
Buffer = Buffer.substr(EOL);
-
+
// Verify that CHECK-NEXT lines have at least one CHECK line before them.
if (IsCheckNext && CheckStrings.empty()) {
SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
@@ -549,34 +549,34 @@ static bool ReadCheckFile(SourceMgr &SM,
CheckPrefix+ ": line", "error");
return true;
}
-
+
// Handle CHECK-NOT.
if (IsCheckNot) {
NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
P));
continue;
}
-
-
+
+
// Okay, add the string we captured to the output vector and move on.
CheckStrings.push_back(CheckString(P,
PatternLoc,
IsCheckNext));
std::swap(NotMatches, CheckStrings.back().NotStrings);
}
-
+
if (CheckStrings.empty()) {
errs() << "error: no check strings found with prefix '" << CheckPrefix
<< ":'\n";
return true;
}
-
+
if (!NotMatches.empty()) {
errs() << "error: '" << CheckPrefix
<< "-NOT:' not supported after last check line.\n";
return true;
}
-
+
return false;
}
@@ -586,11 +586,11 @@ static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
// Otherwise, we have an error, emit an error message.
SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
"error");
-
+
// Print the "scanning from here" line. If the current position is at the
// end of a line, advance to the start of the next line.
Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
-
+
SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
"note");
@@ -606,9 +606,9 @@ static unsigned CountNumNewlinesBetween(StringRef Range) {
// Scan for newline.
Range = Range.substr(Range.find_first_of("\n\r"));
if (Range.empty()) return NumNewLines;
-
+
++NumNewLines;
-
+
// Handle \n\r and \r\n as a single newline.
if (Range.size() > 1 &&
(Range[1] == '\n' || Range[1] == '\r') &&
@@ -624,7 +624,7 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv);
SourceMgr SM;
-
+
// Read the expected strings from the check file.
std::vector<CheckString> CheckStrings;
if (ReadCheckFile(SM, CheckStrings))
@@ -635,35 +635,35 @@ int main(int argc, char **argv) {
MemoryBuffer *F =
MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), &ErrorStr);
if (F == 0) {
- errs() << "Could not open input file '" << InputFilename << "': "
+ errs() << "Could not open input file '" << InputFilename << "': "
<< ErrorStr << '\n';
return true;
}
-
+
// Remove duplicate spaces in the input file if requested.
if (!NoCanonicalizeWhiteSpace)
F = CanonicalizeInputFile(F);
-
+
SM.AddNewSourceBuffer(F, SMLoc());
-
+
/// VariableTable - This holds all the current filecheck variables.
StringMap<StringRef> VariableTable;
-
+
// Check that we have all of the expected strings, in order, in the input
// file.
StringRef Buffer = F->getBuffer();
-
+
const char *LastMatch = Buffer.data();
-
+
for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
const CheckString &CheckStr = CheckStrings[StrNo];
-
+
StringRef SearchFrom = Buffer;
-
+
// Find StrNo in the file.
size_t MatchLen = 0;
Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable));
-
+
// If we didn't find a match, reject the input.
if (Buffer.empty()) {
PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
@@ -690,7 +690,7 @@ int main(int argc, char **argv) {
"previous match was here", "note");
return 1;
}
-
+
if (NumNewLines != 1) {
SM.PrintMessage(CheckStr.Loc,
CheckPrefix+
@@ -703,7 +703,7 @@ int main(int argc, char **argv) {
return 1;
}
}
-
+
// If this match had "not strings", verify that they don't exist in the
// skipped region.
for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
@@ -713,20 +713,20 @@ int main(int argc, char **argv) {
MatchLen,
VariableTable);
if (Pos == StringRef::npos) continue;
-
+
SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
CheckPrefix+"-NOT: string occurred!", "error");
SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
CheckPrefix+"-NOT: pattern specified here", "note");
return 1;
}
-
+
// Otherwise, everything is good. Step over the matched text and remember
// the position after the match as the end of the last match.
Buffer = Buffer.substr(MatchLen);
LastMatch = Buffer.data();
}
-
+
return 0;
}
diff --git a/utils/FileUpdate/FileUpdate.cpp b/utils/FileUpdate/FileUpdate.cpp
index 00c20915fc9d..2cf366fa55f8 100644
--- a/utils/FileUpdate/FileUpdate.cpp
+++ b/utils/FileUpdate/FileUpdate.cpp
@@ -36,6 +36,11 @@ int main(int argc, char **argv) {
PrettyStackTraceProgram X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
+ if (OutputFilename == "-") {
+ errs() << argv[0] << ": error: Can't update standard output\n";
+ return 1;
+ }
+
// Get the input data.
std::string ErrorStr;
MemoryBuffer *In =
@@ -54,7 +59,7 @@ int main(int argc, char **argv) {
memcmp(In->getBufferStart(), Out->getBufferStart(),
Out->getBufferSize()) == 0) {
if (!Quiet)
- outs() << argv[0] << ": Not updating '" << OutputFilename
+ errs() << argv[0] << ": Not updating '" << OutputFilename
<< "', contents match input.\n";
return 0;
}
@@ -63,25 +68,20 @@ int main(int argc, char **argv) {
// Otherwise, overwrite the output.
if (!Quiet)
- outs() << argv[0] << ": Updating '" << OutputFilename
+ errs() << argv[0] << ": Updating '" << OutputFilename
<< "', contents changed.\n";
- raw_fd_ostream OutStream(OutputFilename.c_str(), ErrorStr,
- raw_fd_ostream::F_Binary);
+ tool_output_file OutStream(OutputFilename.c_str(), ErrorStr,
+ raw_fd_ostream::F_Binary);
if (!ErrorStr.empty()) {
errs() << argv[0] << ": Unable to write output '"
<< OutputFilename << "': " << ErrorStr << '\n';
return 1;
}
- OutStream.write(In->getBufferStart(), In->getBufferSize());
- OutStream.close();
+ OutStream.os().write(In->getBufferStart(), In->getBufferSize());
- if (OutStream.has_error()) {
- errs() << argv[0] << ": Could not open output file '"
- << OutputFilename << "': " << ErrorStr << '\n';
- OutStream.clear_error();
- return 1;
- }
+ // Declare success.
+ OutStream.keep();
return 0;
}
diff --git a/utils/Makefile b/utils/Makefile
index 000705ead235..1a4dcca8c5ee 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -8,14 +8,15 @@
##===----------------------------------------------------------------------===##
LEVEL = ..
-PARALLEL_DIRS := TableGen fpcmp PerfectShuffle FileCheck FileUpdate count not unittest
+PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
+ count fpcmp llvm-lit not unittest
-EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh cvsupdate \
+EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh \
DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
getsrcs.sh importNLT.pl llvmdo llvmgrep llvm-native-gcc \
llvm-native-gxx makellvm NightlyTest.gnuplot NightlyTest.pl \
NightlyTestTemplate.html NLT.schema OldenDataRecover.pl \
- parseNLT.pl plotNLT.pl profile.pl RegressionFinder.pl userloc.pl \
+ parseNLT.pl plotNLT.pl profile.pl \
webNLT.pl vim
include $(LEVEL)/Makefile.common
diff --git a/utils/RegressionFinder.pl b/utils/RegressionFinder.pl
deleted file mode 100755
index 86b077780b0d..000000000000
--- a/utils/RegressionFinder.pl
+++ /dev/null
@@ -1,186 +0,0 @@
-#! /usr/bin/perl
-# Script to find regressions by binary-searching a time interval in the
-# CVS tree. Written by Brian Gaeke on 2-Mar-2004.
-#
-
-require 5.6.0; # NOTE: This script not tested with earlier versions.
-use Getopt::Std;
-use POSIX;
-use Time::Local;
-use IO::Handle;
-
-sub usage {
- print STDERR <<END;
-findRegression [-I] -w WTIME -d DTIME -t TOOLS -c SCRIPT
-
-The -w, -d, -t, and -c options are required.
-Run findRegression in the top level of an LLVM tree.
-WTIME is a time when you are sure the regression does NOT exist ("Works").
-DTIME is a time when you are sure the regression DOES exist ("Doesntwork").
-WTIME and DTIME are both in the format: "YYYY/MM/DD HH:MM".
--I means run builds at WTIME and DTIME first to make sure.
-TOOLS is a comma separated list of tools to rebuild before running SCRIPT.
-SCRIPT exits 1 if the regression is present in TOOLS; 0 otherwise.
-END
- exit 1;
-}
-
-sub timeAsSeconds {
- my ($timestr) = @_;
-
- if ( $timestr =~ /(\d\d\d\d)\/(\d\d)\/(\d\d) (\d\d):(\d\d)/ ) {
- my ( $year, $mon, $mday, $hour, $min ) = ( $1, $2, $3, $4, $5 );
- return timegm( 0, $min, $hour, $mday, $mon - 1, $year );
- }
- else {
- die "** Can't parse date + time: $timestr\n";
- }
-}
-
-sub timeAsString {
- my ($secs) = @_;
- return strftime( "%Y/%m/%d %H:%M", gmtime($secs) );
-}
-
-sub run {
- my ($cmdline) = @_;
- print LOG "** Running: $cmdline\n";
- return system($cmdline);
-}
-
-sub buildLibrariesAndTools {
- run("sh /home/vadve/gaeke/scripts/run-configure");
- run("$MAKE -C lib/Support");
- run("$MAKE -C utils");
- run("$MAKE -C lib");
- foreach my $tool (@TOOLS) { run("$MAKE -C tools/$tool"); }
-}
-
-sub contains {
- my ( $file, $regex ) = @_;
- local (*FILE);
- open( FILE, "<$file" ) or die "** can't read $file: $!\n";
- while (<FILE>) {
- if (/$regex/) {
- close FILE;
- return 1;
- }
- }
- close FILE;
- return 0;
-}
-
-sub updateSources {
- my ($time) = @_;
- my $inst = "include/llvm/Instruction.h";
- unlink($inst);
- run( "cvs update -D'" . timeAsString($time) . "'" );
- if ( !contains( $inst, 'class Instruction.*Annotable' ) ) {
- run("patch -F100 -p0 < makeInstructionAnnotable.patch");
- }
-}
-
-sub regressionPresentAt {
- my ($time) = @_;
-
- updateSources($time);
- buildLibrariesAndTools();
- my $rc = run($SCRIPT);
- if ($rc) {
- print LOG "** Found that regression was PRESENT at "
- . timeAsString($time) . "\n";
- return 1;
- }
- else {
- print LOG "** Found that regression was ABSENT at "
- . timeAsString($time) . "\n";
- return 0;
- }
-}
-
-sub regressionAbsentAt {
- my ($time) = @_;
- return !regressionPresentAt($time);
-}
-
-sub closeTo {
- my ( $time1, $time2 ) = @_;
- return abs( $time1 - $time2 ) < 600; # 10 minutes seems reasonable.
-}
-
-sub halfWayPoint {
- my ( $time1, $time2 ) = @_;
- my $halfSpan = int( abs( $time1 - $time2 ) / 2 );
- if ( $time1 < $time2 ) {
- return $time1 + $halfSpan;
- }
- else {
- return $time2 + $halfSpan;
- }
-}
-
-sub checkBoundaryConditions {
- print LOG "** Checking for presence of regression at ", timeAsString($DTIME),
- "\n";
- if ( !regressionPresentAt($DTIME) ) {
- die ( "** Can't help you; $SCRIPT says regression absent at dtime: "
- . timeAsString($DTIME)
- . "\n" );
- }
- print LOG "** Checking for absence of regression at ", timeAsString($WTIME),
- "\n";
- if ( !regressionAbsentAt($WTIME) ) {
- die ( "** Can't help you; $SCRIPT says regression present at wtime: "
- . timeAsString($WTIME)
- . "\n" );
- }
-}
-
-##############################################################################
-
-# Set up log files
-open (STDERR, ">&STDOUT") || die "** Can't redirect std.err: $!\n";
-autoflush STDOUT 1;
-autoflush STDERR 1;
-open (LOG, ">RegFinder.log") || die "** can't write RegFinder.log: $!\n";
-autoflush LOG 1;
-# Check command line arguments and environment variables
-getopts('Iw:d:t:c:');
-if ( !( $opt_w && $opt_d && $opt_t && $opt_c ) ) {
- usage;
-}
-$MAKE = $ENV{'MAKE'};
-$MAKE = 'gmake' unless $MAKE;
-$WTIME = timeAsSeconds($opt_w);
-print LOG "** Assuming worked at ", timeAsString($WTIME), "\n";
-$DTIME = timeAsSeconds($opt_d);
-print LOG "** Assuming didn't work at ", timeAsString($DTIME), "\n";
-$opt_t =~ s/\s*//g;
-$SCRIPT = $opt_c;
-die "** $SCRIPT is not executable or not found\n" unless -x $SCRIPT;
-print LOG "** Checking for the regression using $SCRIPT\n";
-@TOOLS = split ( /,/, $opt_t );
-print LOG (
- "** Going to rebuild: ",
- ( join ", ", @TOOLS ),
- " before each $SCRIPT run\n"
-);
-if ($opt_I) { checkBoundaryConditions(); }
-# do the dirty work:
-while ( !closeTo( $DTIME, $WTIME ) ) {
- my $halfPt = halfWayPoint( $DTIME, $WTIME );
- print LOG "** Checking whether regression is present at ",
- timeAsString($halfPt), "\n";
- if ( regressionPresentAt($halfPt) ) {
- $DTIME = $halfPt;
- }
- else {
- $WTIME = $halfPt;
- }
-}
-# Tell them what we found
-print LOG "** Narrowed it down to:\n";
-print LOG "** Worked at: ", timeAsString($WTIME), "\n";
-print LOG "** Did not work at: ", timeAsString($DTIME), "\n";
-close LOG;
-exit 0;
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
index 50256919bbe6..03b01f6bdb74 100644
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -49,36 +49,35 @@ using namespace llvm;
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_EXTFRM, 13) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
- ENTRY(ARM_FORMAT_THUMBFRM, 24) \
- ENTRY(ARM_FORMAT_NEONFRM, 25) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
- ENTRY(ARM_FORMAT_MISCFRM, 29) \
- ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
- ENTRY(ARM_FORMAT_NLdSt, 31) \
- ENTRY(ARM_FORMAT_N1RegModImm, 32) \
- ENTRY(ARM_FORMAT_N2Reg, 33) \
- ENTRY(ARM_FORMAT_NVCVT, 34) \
- ENTRY(ARM_FORMAT_NVecDupLn, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
- ENTRY(ARM_FORMAT_N3Reg, 38) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
- ENTRY(ARM_FORMAT_NVecExtract, 40) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
- ENTRY(ARM_FORMAT_NVTBL, 42)
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
@@ -1584,7 +1583,7 @@ bool ARMDecoderEmitter::ARMDEBackend::populateInstruction(
Name == "MOVr_TC")
return false;
- // VLDMQ/VSTMQ can be hanlded with the more generic VLDMD/VSTMD.
+ // VLDMQ/VSTMQ can be handled with the more generic VLDMD/VSTMD.
if (Name == "VLDMQ" || Name == "VLDMQ_UPD" ||
Name == "VSTMQ" || Name == "VSTMQ_UPD")
return false;
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index e1aa2bc70f6c..558398648d2c 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -199,6 +199,14 @@ static void TokenizeAsmString(StringRef AsmString,
break;
}
+ case '.':
+ if (InTok) {
+ Tokens.push_back(AsmString.slice(Prev, i));
+ }
+ Prev = i;
+ InTok = true;
+ break;
+
default:
InTok = true;
}
@@ -260,9 +268,12 @@ static bool IsAssemblerInstruction(StringRef Name,
}
if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
- std::string Err = "'" + Name.str() + "': " +
- "invalid assembler instruction; tied operand '" + Tokens[i].str() + "'";
- throw TGError(CGI.TheDef->getLoc(), Err);
+ DEBUG({
+ errs() << "warning: '" << Name << "': "
+ << "ignoring instruction with tied operand '"
+ << Tokens[i].str() << "'\n";
+ });
+ return false;
}
}
@@ -271,6 +282,8 @@ static bool IsAssemblerInstruction(StringRef Name,
namespace {
+struct SubtargetFeatureInfo;
+
/// ClassInfo - Helper class for storing the information about a particular
/// class of operands which can be matched.
struct ClassInfo {
@@ -444,6 +457,9 @@ struct InstructionInfo {
/// Operands - The operands that this instruction matches.
SmallVector<Operand, 4> Operands;
+ /// Predicates - The required subtarget features to match this instruction.
+ SmallVector<SubtargetFeatureInfo*, 4> RequiredFeatures;
+
/// ConversionFnKind - The enum value which is passed to the generated
/// ConvertToMCInst to convert parsed operands into an MCInst for this
/// function.
@@ -505,6 +521,19 @@ public:
void dump();
};
+/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
+/// feature which participates in instruction matching.
+struct SubtargetFeatureInfo {
+ /// \brief The predicate record for this feature.
+ Record *TheDef;
+
+ /// \brief An unique index assigned to represent this feature.
+ unsigned Index;
+
+ /// \brief The name of the enumerated constant identifying this feature.
+ std::string EnumName;
+};
+
class AsmMatcherInfo {
public:
/// The tablegen AsmParser record.
@@ -525,6 +554,9 @@ public:
/// Map of Register records to their class information.
std::map<Record*, ClassInfo*> RegisterClasses;
+ /// Map of Predicate records to their subtarget information.
+ std::map<Record*, SubtargetFeatureInfo*> SubtargetFeatures;
+
private:
/// Map of token to class information which has already been constructed.
std::map<std::string, ClassInfo*> TokenClasses;
@@ -543,6 +575,23 @@ private:
ClassInfo *getOperandClass(StringRef Token,
const CodeGenInstruction::OperandInfo &OI);
+ /// getSubtargetFeature - Lookup or create the subtarget feature info for the
+ /// given operand.
+ SubtargetFeatureInfo *getSubtargetFeature(Record *Def) {
+ assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
+
+ SubtargetFeatureInfo *&Entry = SubtargetFeatures[Def];
+ if (!Entry) {
+ Entry = new SubtargetFeatureInfo;
+ Entry->TheDef = Def;
+ Entry->Index = SubtargetFeatures.size() - 1;
+ Entry->EnumName = "Feature_" + Def->getName();
+ assert(Entry->Index < 32 && "Too many subtarget features!");
+ }
+
+ return Entry;
+ }
+
/// BuildRegisterClasses - Build the ClassInfo* instances for register
/// classes.
void BuildRegisterClasses(CodeGenTarget &Target,
@@ -903,7 +952,31 @@ void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
}
}
}
-
+
+ // Compute the require features.
+ ListInit *Predicates = CGI.TheDef->getValueAsListInit("Predicates");
+ for (unsigned i = 0, e = Predicates->getSize(); i != e; ++i) {
+ if (DefInit *Pred = dynamic_cast<DefInit*>(Predicates->getElement(i))) {
+ // Ignore OptForSize and OptForSpeed, they aren't really requirements,
+ // rather they are hints to isel.
+ //
+ // FIXME: Find better way to model this.
+ if (Pred->getDef()->getName() == "OptForSize" ||
+ Pred->getDef()->getName() == "OptForSpeed")
+ continue;
+
+ // FIXME: Total hack; for now, we just limit ourselves to In32BitMode
+ // and In64BitMode, because we aren't going to have the right feature
+ // masks for SSE and friends. We need to decide what we are going to do
+ // about CPU subtypes to implement this the right way.
+ if (Pred->getDef()->getName() != "In32BitMode" &&
+ Pred->getDef()->getName() != "In64BitMode")
+ continue;
+
+ II->RequiredFeatures.push_back(getSubtargetFeature(Pred->getDef()));
+ }
+ }
+
Instructions.push_back(II.take());
}
@@ -1499,6 +1572,48 @@ static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
OS << "}\n\n";
}
+/// EmitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
+/// definitions.
+static void EmitSubtargetFeatureFlagEnumeration(CodeGenTarget &Target,
+ AsmMatcherInfo &Info,
+ raw_ostream &OS) {
+ OS << "// Flags for subtarget features that participate in "
+ << "instruction matching.\n";
+ OS << "enum SubtargetFeatureFlag {\n";
+ for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
+ it = Info.SubtargetFeatures.begin(),
+ ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
+ SubtargetFeatureInfo &SFI = *it->second;
+ OS << " " << SFI.EnumName << " = (1 << " << SFI.Index << "),\n";
+ }
+ OS << " Feature_None = 0\n";
+ OS << "};\n\n";
+}
+
+/// EmitComputeAvailableFeatures - Emit the function to compute the list of
+/// available features given a subtarget.
+static void EmitComputeAvailableFeatures(CodeGenTarget &Target,
+ AsmMatcherInfo &Info,
+ raw_ostream &OS) {
+ std::string ClassName =
+ Info.AsmParser->getValueAsString("AsmParserClassName");
+
+ OS << "unsigned " << Target.getName() << ClassName << "::\n"
+ << "ComputeAvailableFeatures(const " << Target.getName()
+ << "Subtarget *Subtarget) const {\n";
+ OS << " unsigned Features = 0;\n";
+ for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
+ it = Info.SubtargetFeatures.begin(),
+ ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
+ SubtargetFeatureInfo &SFI = *it->second;
+ OS << " if (" << SFI.TheDef->getValueAsString("CondString")
+ << ")\n";
+ OS << " Features |= " << SFI.EnumName << ";\n";
+ }
+ OS << " return Features;\n";
+ OS << "}\n\n";
+}
+
void AsmMatcherEmitter::run(raw_ostream &OS) {
CodeGenTarget Target;
Record *AsmParser = Target.getAsmParser();
@@ -1550,6 +1665,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+ // Emit the subtarget feature enumeration.
+ EmitSubtargetFeatureFlagEnumeration(Target, Info, OS);
+
// Emit the function to match a register name to number.
EmitMatchRegisterName(Target, AsmParser, OS);
@@ -1570,6 +1688,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
// Emit the subclass predicate routine.
EmitIsSubclass(Target, Info.Classes, OS);
+ // Emit the available features compute function.
+ EmitComputeAvailableFeatures(Target, Info, OS);
+
// Finally, build the match function.
size_t MaxNumOperands = 0;
@@ -1578,13 +1699,10 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
it != ie; ++it)
MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
- const std::string &MatchName =
- AsmParser->getValueAsString("MatchInstructionName");
OS << "bool " << Target.getName() << ClassName << "::\n"
- << MatchName
- << "(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
- OS.indent(MatchName.size() + 1);
- OS << "MCInst &Inst) {\n";
+ << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
+ << " &Operands,\n";
+ OS << " MCInst &Inst) {\n";
// Emit the static match table; unused classes get initalized to 0 which is
// guaranteed to be InvalidMatchClass.
@@ -1600,6 +1718,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " unsigned Opcode;\n";
OS << " ConversionKind ConvertFn;\n";
OS << " MatchClassKind Classes[" << MaxNumOperands << "];\n";
+ OS << " unsigned RequiredFeatures;\n";
OS << " } MatchTable[" << Info.Instructions.size() << "] = {\n";
for (std::vector<InstructionInfo*>::const_iterator it =
@@ -1615,11 +1734,27 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (i) OS << ", ";
OS << Op.Class->Name;
}
- OS << " } },\n";
+ OS << " }, ";
+
+ // Write the required features mask.
+ if (!II.RequiredFeatures.empty()) {
+ for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
+ if (i) OS << "|";
+ OS << II.RequiredFeatures[i]->EnumName;
+ }
+ } else
+ OS << "0";
+
+ OS << "},\n";
}
OS << " };\n\n";
+
+ // Emit code to get the available features.
+ OS << " // Get the current feature set.\n";
+ OS << " unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+
// Emit code to compute the class list for this operand vector.
OS << " // Eliminate obvious mismatches.\n";
OS << " if (Operands.size() > " << MaxNumOperands << ")\n";
@@ -1645,6 +1780,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << " for (const MatchEntry *it = MatchTable, "
<< "*ie = MatchTable + " << Info.Instructions.size()
<< "; it != ie; ++it) {\n";
+
+ // Emit check that the required features are available.
+ OS << " if ((AvailableFeatures & it->RequiredFeatures) "
+ << "!= it->RequiredFeatures)\n";
+ OS << " continue;\n";
+
+ // Emit check that the subclasses match.
for (unsigned i = 0; i != MaxNumOperands; ++i) {
OS << " if (!IsSubclass(Classes["
<< i << "], it->Classes[" << i << "]))\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 1e95467a3b2c..23f13c2ae2d4 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -115,7 +115,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
const AsmWriterInst *Inst = getAsmWriterInstByID(i);
- if (Inst == 0) continue; // PHI, INLINEASM, DBG_LABEL, etc.
+ if (Inst == 0) continue; // PHI, INLINEASM, PROLOG_LABEL, etc.
std::string Command;
if (Inst->Operands.empty())
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index 28ba2ed49fc1..7643609b8724 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -169,6 +169,8 @@ void CallingConvEmitter::EmitAction(Record *Action,
else
O << "\n" << IndentStr << " State.getTarget().getTargetData()"
"->getABITypeAlignment(LocVT.getTypeForEVT(State.getContext()))";
+ if (Action->isSubClassOf("CCAssignToStackWithShadow"))
+ O << ", " << getQualifiedName(Action->getValueAsDef("ShadowReg"));
O << ");\n" << IndentStr
<< "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
<< Counter << ", LocVT, LocInfo));\n";
diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp
index fbdd2a7b2fbe..8d3399a95970 100644
--- a/utils/TableGen/ClangAttrEmitter.cpp
+++ b/utils/TableGen/ClangAttrEmitter.cpp
@@ -13,10 +13,444 @@
#include "ClangAttrEmitter.h"
#include "Record.h"
+#include "llvm/ADT/StringSwitch.h"
#include <algorithm>
+#include <cctype>
using namespace llvm;
+static const std::vector<StringRef> getValueAsListOfStrings(Record &R,
+ StringRef FieldName) {
+ ListInit *List = R.getValueAsListInit(FieldName);
+ assert (List && "Got a null ListInit");
+
+ std::vector<StringRef> Strings;
+ Strings.reserve(List->getSize());
+
+ for (ListInit::iterator i = List->begin(), e = List->end(); i != e; ++i) {
+ assert(*i && "Got a null element in a ListInit");
+ if (StringInit *S = dynamic_cast<StringInit *>(*i))
+ Strings.push_back(S->getValue());
+ else if (CodeInit *C = dynamic_cast<CodeInit *>(*i))
+ Strings.push_back(C->getValue());
+ else
+ assert(false && "Got a non-string, non-code element in a ListInit");
+ }
+
+ return Strings;
+}
+
+std::string ReadPCHRecord(StringRef type) {
+ return StringSwitch<std::string>(type)
+ .EndsWith("Decl *", "cast_or_null<" + std::string(type, 0, type.size()-1) +
+ ">(GetDecl(Record[Idx++]))")
+ .Case("QualType", "ReadTypeRecord(Idx++)")
+ .Default("Record[Idx++]");
+}
+
+// Assumes that the way to get the value is SA->getname()
+std::string WritePCHRecord(StringRef type, StringRef name) {
+ return StringSwitch<std::string>(type)
+ .EndsWith("Decl *", "AddDeclRef(" + std::string(name) +
+ ", Record);\n")
+ .Case("QualType", "AddTypeRef(" + std::string(name) + ", Record);\n")
+ .Default("Record.push_back(" + std::string(name) + ");\n");
+}
+
+namespace {
+ class Argument {
+ std::string lowerName, upperName;
+ StringRef attrName;
+
+ public:
+ Argument(Record &Arg, StringRef Attr)
+ : lowerName(Arg.getValueAsString("Name")), upperName(lowerName),
+ attrName(Attr) {
+ if (!lowerName.empty()) {
+ lowerName[0] = std::tolower(lowerName[0]);
+ upperName[0] = std::toupper(upperName[0]);
+ }
+ }
+ virtual ~Argument() {}
+
+ StringRef getLowerName() const { return lowerName; }
+ StringRef getUpperName() const { return upperName; }
+ StringRef getAttrName() const { return attrName; }
+
+ // These functions print the argument contents formatted in different ways.
+ virtual void writeAccessors(raw_ostream &OS) const = 0;
+ virtual void writeAccessorDefinitions(raw_ostream &OS) const {}
+ virtual void writeCloneArgs(raw_ostream &OS) const = 0;
+ virtual void writeCtorBody(raw_ostream &OS) const {}
+ virtual void writeCtorInitializers(raw_ostream &OS) const = 0;
+ virtual void writeCtorParameters(raw_ostream &OS) const = 0;
+ virtual void writeDeclarations(raw_ostream &OS) const = 0;
+ virtual void writePCHReadArgs(raw_ostream &OS) const = 0;
+ virtual void writePCHReadDecls(raw_ostream &OS) const = 0;
+ virtual void writePCHWrite(raw_ostream &OS) const = 0;
+ };
+
+ class SimpleArgument : public Argument {
+ std::string type;
+
+ public:
+ SimpleArgument(Record &Arg, StringRef Attr, std::string T)
+ : Argument(Arg, Attr), type(T)
+ {}
+
+ void writeAccessors(raw_ostream &OS) const {
+ OS << " " << type << " get" << getUpperName() << "() const {\n";
+ OS << " return " << getLowerName() << ";\n";
+ OS << " }";
+ }
+ void writeCloneArgs(raw_ostream &OS) const {
+ OS << getLowerName();
+ }
+ void writeCtorInitializers(raw_ostream &OS) const {
+ OS << getLowerName() << "(" << getUpperName() << ")";
+ }
+ void writeCtorParameters(raw_ostream &OS) const {
+ OS << type << " " << getUpperName();
+ }
+ void writeDeclarations(raw_ostream &OS) const {
+ OS << type << " " << getLowerName() << ";";
+ }
+ void writePCHReadDecls(raw_ostream &OS) const {
+ std::string read = ReadPCHRecord(type);
+ OS << " " << type << " " << getLowerName() << " = " << read << ";\n";
+ }
+ void writePCHReadArgs(raw_ostream &OS) const {
+ OS << getLowerName();
+ }
+ void writePCHWrite(raw_ostream &OS) const {
+ OS << " " << WritePCHRecord(type, "SA->get" +
+ std::string(getUpperName()) + "()");
+ }
+ };
+
+ class StringArgument : public Argument {
+ public:
+ StringArgument(Record &Arg, StringRef Attr)
+ : Argument(Arg, Attr)
+ {}
+
+ void writeAccessors(raw_ostream &OS) const {
+ OS << " llvm::StringRef get" << getUpperName() << "() const {\n";
+ OS << " return llvm::StringRef(" << getLowerName() << ", "
+ << getLowerName() << "Length);\n";
+ OS << " }\n";
+ OS << " unsigned get" << getUpperName() << "Length() const {\n";
+ OS << " return " << getLowerName() << "Length;\n";
+ OS << " }\n";
+ OS << " void set" << getUpperName()
+ << "(ASTContext &C, llvm::StringRef S) {\n";
+ OS << " " << getLowerName() << "Length = S.size();\n";
+ OS << " this->" << getLowerName() << " = new (C, 1) char ["
+ << getLowerName() << "Length];\n";
+ OS << " std::memcpy(this->" << getLowerName() << ", S.data(), "
+ << getLowerName() << "Length);\n";
+ OS << " }";
+ }
+ void writeCloneArgs(raw_ostream &OS) const {
+ OS << "get" << getUpperName() << "()";
+ }
+ void writeCtorBody(raw_ostream &OS) const {
+ OS << " std::memcpy(" << getLowerName() << ", " << getUpperName()
+ << ".data(), " << getLowerName() << "Length);";
+ }
+ void writeCtorInitializers(raw_ostream &OS) const {
+ OS << getLowerName() << "Length(" << getUpperName() << ".size()),"
+ << getLowerName() << "(new (Ctx, 1) char[" << getLowerName()
+ << "Length])";
+ }
+ void writeCtorParameters(raw_ostream &OS) const {
+ OS << "llvm::StringRef " << getUpperName();
+ }
+ void writeDeclarations(raw_ostream &OS) const {
+ OS << "unsigned " << getLowerName() << "Length;\n";
+ OS << "char *" << getLowerName() << ";";
+ }
+ void writePCHReadDecls(raw_ostream &OS) const {
+ OS << " std::string " << getLowerName() << "= ReadString(Record, Idx);\n";
+ }
+ void writePCHReadArgs(raw_ostream &OS) const {
+ OS << getLowerName();
+ }
+ void writePCHWrite(raw_ostream &OS) const {
+ OS << " AddString(SA->get" << getUpperName() << "(), Record);\n";
+ }
+ };
+
+ class AlignedArgument : public Argument {
+ public:
+ AlignedArgument(Record &Arg, StringRef Attr)
+ : Argument(Arg, Attr)
+ {}
+
+ void writeAccessors(raw_ostream &OS) const {
+ OS << " bool is" << getUpperName() << "Dependent() const;\n";
+
+ OS << " unsigned get" << getUpperName() << "(ASTContext &Ctx) const;\n";
+
+ OS << " bool is" << getUpperName() << "Expr() const {\n";
+ OS << " return is" << getLowerName() << "Expr;\n";
+ OS << " }\n";
+
+ OS << " Expr *get" << getUpperName() << "Expr() const {\n";
+ OS << " assert(is" << getLowerName() << "Expr);\n";
+ OS << " return " << getLowerName() << "Expr;\n";
+ OS << " }\n";
+
+ OS << " TypeSourceInfo *get" << getUpperName() << "Type() const {\n";
+ OS << " assert(!is" << getLowerName() << "Expr);\n";
+ OS << " return " << getLowerName() << "Type;\n";
+ OS << " }";
+ }
+ void writeAccessorDefinitions(raw_ostream &OS) const {
+ OS << "bool " << getAttrName() << "Attr::is" << getUpperName()
+ << "Dependent() const {\n";
+ OS << " if (is" << getLowerName() << "Expr)\n";
+ OS << " return " << getLowerName() << "Expr && (" << getLowerName()
+ << "Expr->isValueDependent() || " << getLowerName()
+ << "Expr->isTypeDependent());\n";
+ OS << " else\n";
+ OS << " return " << getLowerName()
+ << "Type->getType()->isDependentType();\n";
+ OS << "}\n";
+
+ // FIXME: Do not do the calculation here
+ // FIXME: Handle types correctly
+ // A null pointer means maximum alignment
+ // FIXME: Load the platform-specific maximum alignment, rather than
+ // 16, the x86 max.
+ OS << "unsigned " << getAttrName() << "Attr::get" << getUpperName()
+ << "(ASTContext &Ctx) const {\n";
+ OS << " assert(!is" << getUpperName() << "Dependent());\n";
+ OS << " if (is" << getLowerName() << "Expr)\n";
+ OS << " return (" << getLowerName() << "Expr ? " << getLowerName()
+ << "Expr->EvaluateAsInt(Ctx).getZExtValue() : 16)"
+ << "* Ctx.getCharWidth();\n";
+ OS << " else\n";
+ OS << " return 0; // FIXME\n";
+ OS << "}\n";
+ }
+ void writeCloneArgs(raw_ostream &OS) const {
+ OS << "is" << getLowerName() << "Expr, is" << getLowerName()
+ << "Expr ? static_cast<void*>(" << getLowerName()
+ << "Expr) : " << getLowerName()
+ << "Type";
+ }
+ void writeCtorBody(raw_ostream &OS) const {
+ OS << " if (is" << getLowerName() << "Expr)\n";
+ OS << " " << getLowerName() << "Expr = reinterpret_cast<Expr *>("
+ << getUpperName() << ");\n";
+ OS << " else\n";
+ OS << " " << getLowerName()
+ << "Type = reinterpret_cast<TypeSourceInfo *>(" << getUpperName()
+ << ");";
+ }
+ void writeCtorInitializers(raw_ostream &OS) const {
+ OS << "is" << getLowerName() << "Expr(Is" << getUpperName() << "Expr)";
+ }
+ void writeCtorParameters(raw_ostream &OS) const {
+ OS << "bool Is" << getUpperName() << "Expr, void *" << getUpperName();
+ }
+ void writeDeclarations(raw_ostream &OS) const {
+ OS << "bool is" << getLowerName() << "Expr;\n";
+ OS << "union {\n";
+ OS << "Expr *" << getLowerName() << "Expr;\n";
+ OS << "TypeSourceInfo *" << getLowerName() << "Type;\n";
+ OS << "};";
+ }
+ void writePCHReadArgs(raw_ostream &OS) const {
+ OS << "is" << getLowerName() << "Expr, " << getLowerName() << "Ptr";
+ }
+ void writePCHReadDecls(raw_ostream &OS) const {
+ OS << " bool is" << getLowerName() << "Expr = Record[Idx++];\n";
+ OS << " void *" << getLowerName() << "Ptr;\n";
+ OS << " if (is" << getLowerName() << "Expr)\n";
+ OS << " " << getLowerName() << "Ptr = ReadExpr(DeclsCursor);\n";
+ OS << " else\n";
+ OS << " " << getLowerName()
+ << "Ptr = GetTypeSourceInfo(DeclsCursor, Record, Idx);\n";
+ }
+ void writePCHWrite(raw_ostream &OS) const {
+ OS << " Record.push_back(SA->is" << getUpperName() << "Expr());\n";
+ OS << " if (SA->is" << getUpperName() << "Expr())\n";
+ OS << " AddStmt(SA->get" << getUpperName() << "Expr());\n";
+ OS << " else\n";
+ OS << " AddTypeSourceInfo(SA->get" << getUpperName()
+ << "Type(), Record);\n";
+ }
+ };
+
+ class VariadicArgument : public Argument {
+ std::string type;
+
+ public:
+ VariadicArgument(Record &Arg, StringRef Attr, std::string T)
+ : Argument(Arg, Attr), type(T)
+ {}
+
+ std::string getType() const { return type; }
+
+ void writeAccessors(raw_ostream &OS) const {
+ OS << " typedef " << type << "* " << getLowerName() << "_iterator;\n";
+ OS << " " << getLowerName() << "_iterator " << getLowerName()
+ << "_begin() const {\n";
+ OS << " return " << getLowerName() << ";\n";
+ OS << " }\n";
+ OS << " " << getLowerName() << "_iterator " << getLowerName()
+ << "_end() const {\n";
+ OS << " return " << getLowerName() << " + " << getLowerName()
+ << "Size;\n";
+ OS << " }\n";
+ OS << " unsigned " << getLowerName() << "_size() const {\n"
+ << " return " << getLowerName() << "Size;\n;";
+ OS << " }";
+ }
+ void writeCloneArgs(raw_ostream &OS) const {
+ OS << getLowerName() << ", " << getLowerName() << "Size";
+ }
+ void writeCtorBody(raw_ostream &OS) const {
+ // FIXME: memcpy is not safe on non-trivial types.
+ OS << " std::memcpy(" << getLowerName() << ", " << getUpperName()
+ << ", " << getLowerName() << "Size * sizeof(" << getType() << "));\n";
+ }
+ void writeCtorInitializers(raw_ostream &OS) const {
+ OS << getLowerName() << "Size(" << getUpperName() << "Size), "
+ << getLowerName() << "(new (Ctx, 16) " << getType() << "["
+ << getLowerName() << "Size])";
+ }
+ void writeCtorParameters(raw_ostream &OS) const {
+ OS << getType() << " *" << getUpperName() << ", unsigned "
+ << getUpperName() << "Size";
+ }
+ void writeDeclarations(raw_ostream &OS) const {
+ OS << " unsigned " << getLowerName() << "Size;\n";
+ OS << " " << getType() << " *" << getLowerName() << ";";
+ }
+ void writePCHReadDecls(raw_ostream &OS) const {
+ OS << " unsigned " << getLowerName() << "Size = Record[Idx++];\n";
+ OS << " llvm::SmallVector<" << type << ", 4> " << getLowerName()
+ << ";\n";
+ OS << " " << getLowerName() << ".reserve(" << getLowerName()
+ << "Size);\n";
+ OS << " for (unsigned i = " << getLowerName() << "Size; i; --i)\n";
+
+ std::string read = ReadPCHRecord(type);
+ OS << " " << getLowerName() << ".push_back(" << read << ");\n";
+ }
+ void writePCHReadArgs(raw_ostream &OS) const {
+ OS << getLowerName() << ".data(), " << getLowerName() << "Size";
+ }
+ void writePCHWrite(raw_ostream &OS) const{
+ OS << " Record.push_back(SA->" << getLowerName() << "_size());\n";
+ OS << " for (" << getAttrName() << "Attr::" << getLowerName()
+ << "_iterator i = SA->" << getLowerName() << "_begin(), e = SA->"
+ << getLowerName() << "_end(); i != e; ++i)\n";
+ OS << " " << WritePCHRecord(type, "(*i)");
+ }
+ };
+
+ class EnumArgument : public Argument {
+ std::string type;
+ std::vector<StringRef> values, enums;
+ public:
+ EnumArgument(Record &Arg, StringRef Attr)
+ : Argument(Arg, Attr), type(Arg.getValueAsString("Type")),
+ values(getValueAsListOfStrings(Arg, "Values")),
+ enums(getValueAsListOfStrings(Arg, "Enums"))
+ {}
+
+ void writeAccessors(raw_ostream &OS) const {
+ OS << " " << type << " get" << getUpperName() << "() const {\n";
+ OS << " return " << getLowerName() << ";\n";
+ OS << " }";
+ }
+ void writeCloneArgs(raw_ostream &OS) const {
+ OS << getLowerName();
+ }
+ void writeCtorInitializers(raw_ostream &OS) const {
+ OS << getLowerName() << "(" << getUpperName() << ")";
+ }
+ void writeCtorParameters(raw_ostream &OS) const {
+ OS << type << " " << getUpperName();
+ }
+ void writeDeclarations(raw_ostream &OS) const {
+ // Calculate the various enum values
+ std::vector<StringRef> uniques(enums);
+ std::sort(uniques.begin(), uniques.end());
+ uniques.erase(std::unique(uniques.begin(), uniques.end()),
+ uniques.end());
+ // FIXME: Emit a proper error
+ assert(!uniques.empty());
+
+ std::vector<StringRef>::iterator i = uniques.begin(),
+ e = uniques.end();
+ // The last one needs to not have a comma.
+ --e;
+
+ OS << "public:\n";
+ OS << " enum " << type << " {\n";
+ for (; i != e; ++i)
+ OS << " " << *i << ",\n";
+ OS << " " << *e << "\n";
+ OS << " };\n";
+ OS << "private:\n";
+ OS << " " << type << " " << getLowerName() << ";";
+ }
+ void writePCHReadDecls(raw_ostream &OS) const {
+ OS << " " << getAttrName() << "Attr::" << type << " " << getLowerName()
+ << "(static_cast<" << getAttrName() << "Attr::" << type
+ << ">(Record[Idx++]));\n";
+ }
+ void writePCHReadArgs(raw_ostream &OS) const {
+ OS << getLowerName();
+ }
+ void writePCHWrite(raw_ostream &OS) const {
+ OS << "Record.push_back(SA->get" << getUpperName() << "());\n";
+ }
+ };
+}
+
+static Argument *createArgument(Record &Arg, StringRef Attr,
+ Record *Search = 0) {
+ if (!Search)
+ Search = &Arg;
+
+ Argument *Ptr = 0;
+ llvm::StringRef ArgName = Search->getName();
+
+ if (ArgName == "AlignedArgument") Ptr = new AlignedArgument(Arg, Attr);
+ else if (ArgName == "EnumArgument") Ptr = new EnumArgument(Arg, Attr);
+ else if (ArgName == "ExprArgument") Ptr = new SimpleArgument(Arg, Attr,
+ "Expr *");
+ else if (ArgName == "FunctionArgument")
+ Ptr = new SimpleArgument(Arg, Attr, "FunctionDecl *");
+ else if (ArgName == "IdentifierArgument")
+ Ptr = new SimpleArgument(Arg, Attr, "IdentifierInfo *");
+ else if (ArgName == "IntArgument") Ptr = new SimpleArgument(Arg, Attr, "int");
+ else if (ArgName == "StringArgument") Ptr = new StringArgument(Arg, Attr);
+ else if (ArgName == "TypeArgument")
+ Ptr = new SimpleArgument(Arg, Attr, "QualType");
+ else if (ArgName == "UnsignedArgument")
+ Ptr = new SimpleArgument(Arg, Attr, "unsigned");
+ else if (ArgName == "VariadicUnsignedArgument")
+ Ptr = new VariadicArgument(Arg, Attr, "unsigned");
+
+ if (!Ptr) {
+ std::vector<Record*> Bases = Search->getSuperClasses();
+ for (std::vector<Record*>::iterator i = Bases.begin(), e = Bases.end();
+ i != e; ++i) {
+ Ptr = createArgument(Arg, Attr, *i);
+ if (Ptr)
+ break;
+ }
+ }
+ return Ptr;
+}
+
void ClangAttrClassEmitter::run(raw_ostream &OS) {
OS << "// This file is generated by TableGen. Do not edit.\n\n";
OS << "#ifndef LLVM_CLANG_ATTR_CLASSES_INC\n";
@@ -28,29 +462,63 @@ void ClangAttrClassEmitter::run(raw_ostream &OS) {
i != e; ++i) {
Record &R = **i;
- if (R.getValueAsBit("DoNotEmit"))
- continue;
-
OS << "class " << R.getName() << "Attr : public Attr {\n";
- std::vector<Record*> Args = R.getValueAsListOfDefs("Args");
+ std::vector<Record*> ArgRecords = R.getValueAsListOfDefs("Args");
+ std::vector<Argument*> Args;
+ std::vector<Argument*>::iterator ai, ae;
+ Args.reserve(ArgRecords.size());
+
+ for (std::vector<Record*>::iterator ri = ArgRecords.begin(),
+ re = ArgRecords.end();
+ ri != re; ++ri) {
+ Record &ArgRecord = **ri;
+ Argument *Arg = createArgument(ArgRecord, R.getName());
+ assert(Arg);
+ Args.push_back(Arg);
+
+ Arg->writeDeclarations(OS);
+ OS << "\n\n";
+ }
- // FIXME: Handle arguments
- assert(Args.empty() && "Can't yet handle arguments");
+ ae = Args.end();
OS << "\n public:\n";
- OS << " " << R.getName() << "Attr(";
+ OS << " " << R.getName() << "Attr(SourceLocation L, ASTContext &Ctx\n";
- // Arguments go here
+ for (ai = Args.begin(); ai != ae; ++ai) {
+ OS << " , ";
+ (*ai)->writeCtorParameters(OS);
+ OS << "\n";
+ }
- OS << ")\n";
- OS << " : Attr(attr::" << R.getName() << ")";
+ OS << " )\n";
+ OS << " : Attr(attr::" << R.getName() << ", L)\n";
- // Arguments go here
-
- OS << " {}\n\n";
+ for (ai = Args.begin(); ai != ae; ++ai) {
+ OS << " , ";
+ (*ai)->writeCtorInitializers(OS);
+ OS << "\n";
+ }
+
+ OS << " {\n";
+
+ for (ai = Args.begin(); ai != ae; ++ai) {
+ (*ai)->writeCtorBody(OS);
+ OS << "\n";
+ }
+ OS << " }\n\n";
+
+ OS << " virtual " << R.getName() << "Attr *clone (ASTContext &C) const;\n";
+
+ for (ai = Args.begin(); ai != ae; ++ai) {
+ (*ai)->writeAccessors(OS);
+ OS << "\n\n";
+ }
+
+ OS << R.getValueAsCode("AdditionalMembers");
+ OS << "\n\n";
- OS << " virtual Attr *clone (ASTContext &C) const;\n";
OS << " static bool classof(const Attr *A) { return A->getKind() == "
<< "attr::" << R.getName() << "; }\n";
OS << " static bool classof(const " << R.getName()
@@ -61,6 +529,34 @@ void ClangAttrClassEmitter::run(raw_ostream &OS) {
OS << "#endif\n";
}
+void ClangAttrImplEmitter::run(raw_ostream &OS) {
+ OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+ std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+ std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ri, re;
+ std::vector<Argument*>::iterator ai, ae;
+
+ for (; i != e; ++i) {
+ Record &R = **i;
+ std::vector<Record*> ArgRecords = R.getValueAsListOfDefs("Args");
+ std::vector<Argument*> Args;
+ for (ri = ArgRecords.begin(), re = ArgRecords.end(); ri != re; ++ri)
+ Args.push_back(createArgument(**ri, R.getName()));
+
+ for (ai = Args.begin(), ae = Args.end(); ai != ae; ++ai)
+ (*ai)->writeAccessorDefinitions(OS);
+
+ OS << R.getName() << "Attr *" << R.getName()
+ << "Attr::clone(ASTContext &C) const {\n";
+ OS << " return new (C) " << R.getName() << "Attr(getLocation(), C";
+ for (ai = Args.begin(); ai != ae; ++ai) {
+ OS << ", ";
+ (*ai)->writeCloneArgs(OS);
+ }
+ OS << ");\n}\n\n";
+ }
+}
+
void ClangAttrListEmitter::run(raw_ostream &OS) {
OS << "// This file is generated by TableGen. Do not edit.\n\n";
@@ -82,3 +578,61 @@ void ClangAttrListEmitter::run(raw_ostream &OS) {
OS << "#undef LAST_ATTR\n";
OS << "#undef ATTR\n";
}
+
+void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
+ OS << "// This file is generated by TableGen. Do not edi.\n\n";
+
+ std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
+ ArgRecords;
+ std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
+ std::vector<Argument*> Args;
+ std::vector<Argument*>::iterator ri, re;
+
+ OS << " switch (Kind) {\n";
+ OS << " default:\n";
+ OS << " assert(0 && \"Unknown attribute!\");\n";
+ OS << " break;\n";
+ for (; i != e; ++i) {
+ Record &R = **i;
+ OS << " case attr::" << R.getName() << ": {\n";
+ ArgRecords = R.getValueAsListOfDefs("Args");
+ Args.clear();
+ for (ai = ArgRecords.begin(), ae = ArgRecords.end(); ai != ae; ++ai) {
+ Argument *A = createArgument(**ai, R.getName());
+ Args.push_back(A);
+ A->writePCHReadDecls(OS);
+ }
+ OS << " New = new (*Context) " << R.getName() << "Attr(Loc, *Context";
+ for (ri = Args.begin(), re = Args.end(); ri != re; ++ri) {
+ OS << ", ";
+ (*ri)->writePCHReadArgs(OS);
+ }
+ OS << ");\n";
+ OS << " break;\n";
+ OS << " }\n";
+ }
+ OS << " }\n";
+}
+
+void ClangAttrPCHWriteEmitter::run(raw_ostream &OS) {
+ std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
+ std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
+
+ OS << " switch (A->getKind()) {\n";
+ OS << " default:\n";
+ OS << " llvm_unreachable(\"Unknown attribute kind!\");\n";
+ OS << " break;\n";
+ for (; i != e; ++i) {
+ Record &R = **i;
+ OS << " case attr::" << R.getName() << ": {\n";
+ Args = R.getValueAsListOfDefs("Args");
+ if (!Args.empty())
+ OS << " const " << R.getName() << "Attr *SA = cast<" << R.getName()
+ << "Attr>(A);\n";
+ for (ai = Args.begin(), ae = Args.end(); ai != ae; ++ai)
+ createArgument(**ai, R.getName())->writePCHWrite(OS);
+ OS << " break;\n";
+ OS << " }\n";
+ }
+ OS << " }\n";
+}
diff --git a/utils/TableGen/ClangAttrEmitter.h b/utils/TableGen/ClangAttrEmitter.h
index 5ce1c879efeb..83149824b2e7 100644
--- a/utils/TableGen/ClangAttrEmitter.h
+++ b/utils/TableGen/ClangAttrEmitter.h
@@ -31,6 +31,19 @@ class ClangAttrClassEmitter : public TableGenBackend {
void run(raw_ostream &OS);
};
+/// ClangAttrImplEmitter - class emits the class method defintions for
+/// attributes for clang.
+class ClangAttrImplEmitter : public TableGenBackend {
+ RecordKeeper &Records;
+
+ public:
+ explicit ClangAttrImplEmitter(RecordKeeper &R)
+ : Records(R)
+ {}
+
+ void run(raw_ostream &OS);
+};
+
/// ClangAttrListEmitter - class emits the enumeration list for attributes for
/// clang.
class ClangAttrListEmitter : public TableGenBackend {
@@ -44,6 +57,32 @@ class ClangAttrListEmitter : public TableGenBackend {
void run(raw_ostream &OS);
};
+/// ClangAttrPCHReadEmitter - class emits the code to read an attribute from
+/// a clang precompiled header.
+class ClangAttrPCHReadEmitter : public TableGenBackend {
+ RecordKeeper &Records;
+
+public:
+ explicit ClangAttrPCHReadEmitter(RecordKeeper &R)
+ : Records(R)
+ {}
+
+ void run(raw_ostream &OS);
+};
+
+/// ClangAttrPCHWriteEmitter - class emits the code to read an attribute from
+/// a clang precompiled header.
+class ClangAttrPCHWriteEmitter : public TableGenBackend {
+ RecordKeeper &Records;
+
+public:
+ explicit ClangAttrPCHWriteEmitter(RecordKeeper &R)
+ : Records(R)
+ {}
+
+ void run(raw_ostream &OS);
+};
+
}
#endif
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 878ed09550aa..303aa6c450c2 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2197,10 +2197,10 @@ private:
if (IntInfo->ModRef >= CodeGenIntrinsic::ReadArgMem)
mayLoad = true;// These may load memory.
- if (IntInfo->ModRef >= CodeGenIntrinsic::WriteArgMem)
+ if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteArgMem)
mayStore = true;// Intrinsics that can write to memory are 'mayStore'.
- if (IntInfo->ModRef >= CodeGenIntrinsic::WriteMem)
+ if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
// WriteMem intrinsics can have other strange effects.
HasSideEffects = true;
}
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 35b54a542717..01a1fe11f531 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -102,6 +102,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R, const std::string &AsmStr)
isReturn = R->getValueAsBit("isReturn");
isBranch = R->getValueAsBit("isBranch");
isIndirectBranch = R->getValueAsBit("isIndirectBranch");
+ isCompare = R->getValueAsBit("isCompare");
isBarrier = R->getValueAsBit("isBarrier");
isCall = R->getValueAsBit("isCall");
canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 946c2d01a52f..b02d0d38f975 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -123,6 +123,7 @@ namespace llvm {
bool isReturn;
bool isBranch;
bool isIndirectBranch;
+ bool isCompare;
bool isBarrier;
bool isCall;
bool canFoldAsLoad;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 7e7bdf989acf..3208c0d628d9 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -60,7 +60,7 @@ namespace llvm {
// Memory mod/ref behavior of this intrinsic.
enum {
- NoMem, ReadArgMem, ReadMem, WriteArgMem, WriteMem
+ NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
} ModRef;
/// This is set to true if the intrinsic is overloaded by its argument
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 344f77f1fed6..ccd3d222bbad 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include <string>
#include <vector>
+#include <set>
#include <cstdlib>
namespace llvm {
@@ -55,6 +56,37 @@ namespace llvm {
assert(0 && "VTNum greater than number of ValueTypes in RegClass!");
abort();
}
+
+ // Returns true if RC is a strict subclass.
+ // RC is a sub-class of this class if it is a valid replacement for any
+ // instruction operand where a register of this classis required. It must
+ // satisfy these conditions:
+ //
+ // 1. All RC registers are also in this.
+ // 2. The RC spill size must not be smaller than our spill size.
+ // 3. RC spill alignment must be compatible with ours.
+ //
+ bool hasSubClass(const CodeGenRegisterClass *RC) const {
+
+ if (RC->Elements.size() > Elements.size() ||
+ (SpillAlignment && RC->SpillAlignment % SpillAlignment) ||
+ SpillSize > RC->SpillSize)
+ return false;
+
+ std::set<Record*> RegSet;
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ Record *Reg = Elements[i];
+ RegSet.insert(Reg);
+ }
+
+ for (unsigned i = 0, e = RC->Elements.size(); i != e; ++i) {
+ Record *Reg = RC->Elements[i];
+ if (!RegSet.count(Reg))
+ return false;
+ }
+
+ return true;
+ }
CodeGenRegisterClass(Record *R);
};
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index d8130fb36dde..cbfe2addbf2b 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -333,7 +333,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
const char *const FixedInstrs[] = {
"PHI",
"INLINEASM",
- "DBG_LABEL",
+ "PROLOG_LABEL",
"EH_LABEL",
"GC_LABEL",
"KILL",
@@ -434,7 +434,7 @@ std::vector<CodeGenIntrinsic> llvm::LoadIntrinsics(const RecordKeeper &RC,
CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
TheDef = R;
std::string DefName = R->getName();
- ModRef = WriteMem;
+ ModRef = ReadWriteMem;
isOverloaded = false;
isCommutative = false;
@@ -555,10 +555,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
ModRef = ReadArgMem;
else if (Property->getName() == "IntrReadMem")
ModRef = ReadMem;
- else if (Property->getName() == "IntrWriteArgMem")
- ModRef = WriteArgMem;
- else if (Property->getName() == "IntrWriteMem")
- ModRef = WriteMem;
+ else if (Property->getName() == "IntrReadWriteArgMem")
+ ModRef = ReadWriteArgMem;
else if (Property->getName() == "Commutative")
isCommutative = true;
else if (Property->isSubClassOf("NoCapture")) {
diff --git a/utils/TableGen/DAGISelEmitter.cpp b/utils/TableGen/DAGISelEmitter.cpp
index 04c7710ac503..8a73404dad95 100644
--- a/utils/TableGen/DAGISelEmitter.cpp
+++ b/utils/TableGen/DAGISelEmitter.cpp
@@ -57,51 +57,6 @@ static unsigned getResultPatternSize(TreePatternNode *P,
return Cost;
}
-//===----------------------------------------------------------------------===//
-// Predicate emitter implementation.
-//
-
-void DAGISelEmitter::EmitPredicateFunctions(raw_ostream &OS) {
- OS << "\n// Predicate functions.\n";
-
- // Walk the pattern fragments, adding them to a map, which sorts them by
- // name.
- typedef std::map<std::string, std::pair<Record*, TreePattern*> > PFsByNameTy;
- PFsByNameTy PFsByName;
-
- for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
- I != E; ++I)
- PFsByName.insert(std::make_pair(I->first->getName(), *I));
-
-
- for (PFsByNameTy::iterator I = PFsByName.begin(), E = PFsByName.end();
- I != E; ++I) {
- Record *PatFragRecord = I->second.first;// Record that derives from PatFrag.
- TreePattern *P = I->second.second;
-
- // If there is a code init for this fragment, emit the predicate code.
- std::string Code = PatFragRecord->getValueAsCode("Predicate");
- if (Code.empty()) continue;
-
- if (P->getOnlyTree()->isLeaf())
- OS << "inline bool Predicate_" << PatFragRecord->getName()
- << "(SDNode *N) const {\n";
- else {
- std::string ClassName =
- CGP.getSDNodeInfo(P->getOnlyTree()->getOperator()).getSDClassName();
- const char *C2 = ClassName == "SDNode" ? "N" : "inN";
-
- OS << "inline bool Predicate_" << PatFragRecord->getName()
- << "(SDNode *" << C2 << ") const {\n";
- if (ClassName != "SDNode")
- OS << " " << ClassName << " *N = cast<" << ClassName << ">(inN);\n";
- }
- OS << Code << "\n}\n";
- }
-
- OS << "\n\n";
-}
-
namespace {
// PatternSortingPredicate - return true if we prefer to match LHS before RHS.
// In particular, we want to match maximal patterns first and lowest cost within
@@ -168,9 +123,6 @@ void DAGISelEmitter::run(raw_ostream &OS) {
errs() << "\n";
});
- // FIXME: These are being used by hand written code, gross.
- EmitPredicateFunctions(OS);
-
// Add all the patterns to a temporary list so we can sort them.
std::vector<const PatternToMatch*> Patterns;
for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(), E = CGP.ptm_end();
diff --git a/utils/TableGen/DAGISelEmitter.h b/utils/TableGen/DAGISelEmitter.h
index 5ffdde8eadda..2117e65455ac 100644
--- a/utils/TableGen/DAGISelEmitter.h
+++ b/utils/TableGen/DAGISelEmitter.h
@@ -31,8 +31,6 @@ public:
// run - Output the isel, returning true on failure.
void run(raw_ostream &OS);
-private:
- void EmitPredicateFunctions(raw_ostream &OS);
};
} // End llvm namespace
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 3750bd8a3349..dfbfe80c0a1d 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains code to generate C++ code a matcher.
+// This file contains code to generate C++ code for a matcher.
//
//===----------------------------------------------------------------------===//
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index eb528eb02be8..aba6636a1370 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -689,8 +689,8 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
!CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
// This is a predicate or optional def operand; emit the
// 'default ops' operands.
- const DAGDefaultOperand &DefaultOp =
- CGP.getDefaultOperand(II.OperandList[InstOpNo].Rec);
+ const DAGDefaultOperand &DefaultOp
+ = CGP.getDefaultOperand(OperandNode);
for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
EmitResultOperand(DefaultOp.DefaultOps[i], InstOps);
continue;
@@ -908,6 +908,3 @@ Matcher *llvm::ConvertPatternToMatcher(const PatternToMatch &Pattern,
// Unconditional match.
return Gen.GetMatcher();
}
-
-
-
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index c5ee82850d18..525fffb0ee2c 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -84,34 +84,6 @@ namespace {
}
};
- class StructEmitter {
- private:
- std::string Name;
- typedef std::pair<const char*, const char*> member;
- std::vector< member > Members;
- public:
- StructEmitter(const char *N) : Name(N) {
- }
- void addMember(const char *t, const char *n) {
- member m(t, n);
- Members.push_back(m);
- }
- void emit(raw_ostream &o, unsigned int &i) {
- o.indent(i) << "struct " << Name.c_str() << " {" << "\n";
- i += 2;
-
- unsigned int index = 0;
- unsigned int numMembers = Members.size();
- for (index = 0; index < numMembers; ++index) {
- o.indent(i) << Members[index].first << " ";
- o.indent(i) << Members[index].second << ";" << "\n";
- }
-
- i -= 2;
- o.indent(i) << "};" << "\n";
- }
- };
-
class ConstantEmitter {
public:
virtual ~ConstantEmitter() { }
@@ -126,10 +98,6 @@ namespace {
const char* String;
};
public:
- LiteralConstantEmitter(const char *string) :
- IsNumber(false),
- String(string) {
- }
LiteralConstantEmitter(int number = 0) :
IsNumber(true),
Number(number) {
@@ -139,11 +107,6 @@ namespace {
Number = 0;
String = string;
}
- void set(int number) {
- IsNumber = true;
- String = NULL;
- Number = number;
- }
bool is(const char *string) {
return !strcmp(String, string);
}
@@ -339,6 +302,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
MEM("f80mem");
MEM("opaque80mem");
MEM("i128mem");
+ MEM("i256mem");
MEM("f128mem");
MEM("f256mem");
MEM("opaque512mem");
@@ -577,6 +541,7 @@ static void X86ExtractSemantics(
static int ARMFlagFromOpName(LiteralConstantEmitter *type,
const std::string &name) {
REG("GPR");
+ REG("rGPR");
REG("tcGPR");
REG("cc_out");
REG("s_cc_out");
@@ -597,6 +562,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
IMM("cps_opt");
IMM("vfp_f64imm");
IMM("vfp_f32imm");
+ IMM("memb_opt");
IMM("msr_mask");
IMM("neg_zero");
IMM("imm0_31");
@@ -605,6 +571,7 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
IMM("jt2block_operand");
IMM("t_imm_s4");
IMM("pclabel");
+ IMM("shift_imm");
MISC("brtarget", "kOperandTypeARMBranchTarget"); // ?
MISC("so_reg", "kOperandTypeARMSoReg"); // R, R, I
@@ -895,21 +862,3 @@ void EDEmitter::run(raw_ostream &o) {
o << "}\n";
}
-
-void EDEmitter::runHeader(raw_ostream &o) {
- EmitSourceFileHeader("Enhanced Disassembly Info Header", o);
-
- o << "#ifndef EDInfo_" << "\n";
- o << "#define EDInfo_" << "\n";
- o << "\n";
- o << "#define EDIS_MAX_OPERANDS " << format("%d", EDIS_MAX_OPERANDS) << "\n";
- o << "#define EDIS_MAX_SYNTAXES " << format("%d", EDIS_MAX_SYNTAXES) << "\n";
- o << "\n";
-
- unsigned int i = 0;
-
- emitCommonEnums(o, i);
-
- o << "\n";
- o << "#endif" << "\n";
-}
diff --git a/utils/TableGen/EDEmitter.h b/utils/TableGen/EDEmitter.h
index 9e40a8b1ca4e..e30373fed2eb 100644
--- a/utils/TableGen/EDEmitter.h
+++ b/utils/TableGen/EDEmitter.h
@@ -27,9 +27,6 @@ namespace llvm {
// run - Output the instruction table.
void run(raw_ostream &o);
-
- // runHeader - Emit a header file that allows use of the instruction table.
- void runHeader(raw_ostream &o);
};
} // End llvm namespace
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 08fc139b5d10..6c16fcfaa8a2 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -54,6 +54,7 @@ struct OperandsSignature {
bool initialize(TreePatternNode *InstPatNode,
const CodeGenTarget &Target,
MVT::SimpleValueType VT) {
+
if (!InstPatNode->isLeaf()) {
if (InstPatNode->getOperator()->getName() == "imm") {
Operands.push_back("i");
@@ -69,6 +70,7 @@ struct OperandsSignature {
for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
TreePatternNode *Op = InstPatNode->getChild(i);
+
// For now, filter out any operand with a predicate.
// For now, filter out any operand with multiple values.
if (!Op->getPredicateFns().empty() ||
@@ -105,13 +107,15 @@ struct OperandsSignature {
RC = Target.getRegisterClassForRegister(OpLeafRec);
else
return false;
- // For now, require the register operands' register classes to all
- // be the same.
+
+ // For now, this needs to be a register class of some sort.
if (!RC)
return false;
- // For now, all the operands must have the same register class.
+
+ // For now, all the operands must have the same register class or be
+ // a strict subclass of the destination.
if (DstRC) {
- if (DstRC != RC)
+ if (DstRC != RC && !DstRC->hasSubClass(RC))
return false;
} else
DstRC = RC;
@@ -208,7 +212,8 @@ class FastISelMap {
typedef std::map<MVT::SimpleValueType, PredMap> RetPredMap;
typedef std::map<MVT::SimpleValueType, RetPredMap> TypeRetPredMap;
typedef std::map<std::string, TypeRetPredMap> OpcodeTypeRetPredMap;
- typedef std::map<OperandsSignature, OpcodeTypeRetPredMap> OperandsOpcodeTypeRetPredMap;
+ typedef std::map<OperandsSignature, OpcodeTypeRetPredMap>
+ OperandsOpcodeTypeRetPredMap;
OperandsOpcodeTypeRetPredMap SimplePatterns;
@@ -260,7 +265,7 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
if (II.OperandList.empty())
continue;
-
+
// For now, ignore multi-instruction patterns.
bool MultiInsts = false;
for (unsigned i = 0, e = Dst->getNumChildren(); i != e; ++i) {
@@ -287,6 +292,10 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
if (!DstRC)
continue;
} else {
+ // If this isn't a leaf, then continue since the register classes are
+ // a bit too complicated for now.
+ if (!Dst->getChild(1)->isLeaf()) continue;
+
DefInit *SR = dynamic_cast<DefInit*>(Dst->getChild(1)->getLeafValue());
if (SR)
SubRegNo = getQualifiedName(SR->getDef());
@@ -371,7 +380,8 @@ void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
SubRegNo,
PhysRegInputs
};
- assert(!SimplePatterns[Operands][OpcodeName][VT][RetVT].count(PredicateCheck) &&
+ assert(!SimplePatterns[Operands][OpcodeName][VT][RetVT]
+ .count(PredicateCheck) &&
"Duplicate pattern!");
SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo;
}
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index f28af1589d65..4d3aa5e621c9 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -270,6 +270,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
if (Inst.isReturn) OS << "|(1<<TID::Return)";
if (Inst.isBranch) OS << "|(1<<TID::Branch)";
if (Inst.isIndirectBranch) OS << "|(1<<TID::IndirectBranch)";
+ if (Inst.isCompare) OS << "|(1<<TID::Compare)";
if (Inst.isBarrier) OS << "|(1<<TID::Barrier)";
if (Inst.hasDelaySlot) OS << "|(1<<TID::DelaySlot)";
if (Inst.isCall) OS << "|(1<<TID::Call)";
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index d7a90513df33..ba30d97eaa35 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -545,7 +545,7 @@ EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
OS << "switch (iid) {\n";
OS << "default:\n return UnknownModRefBehavior;\n";
for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
- if (Ints[i].ModRef == CodeGenIntrinsic::WriteMem)
+ if (Ints[i].ModRef == CodeGenIntrinsic::ReadWriteMem)
continue;
OS << "case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName
<< ":\n";
@@ -559,7 +559,7 @@ EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
case CodeGenIntrinsic::ReadMem:
OS << " return OnlyReadsMemory;\n";
break;
- case CodeGenIntrinsic::WriteArgMem:
+ case CodeGenIntrinsic::ReadWriteArgMem:
OS << " return AccessesArguments;\n";
break;
}
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index da2d54f5439b..8b81e14cc26a 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -33,6 +33,7 @@ namespace {
/// Typedefs
typedef std::vector<Record*> RecordVector;
+typedef std::vector<const DagInit*> DagVector;
typedef std::vector<std::string> StrVector;
//===----------------------------------------------------------------------===//
@@ -49,7 +50,7 @@ const unsigned Indent4 = TabWidth*4;
const char * const DefaultHelpString = "NO HELP MESSAGE PROVIDED";
// Name for the "sink" option.
-const char * const SinkOptionName = "AutoGeneratedSinkOption";
+const char * const SinkOptionName = "SinkOption";
//===----------------------------------------------------------------------===//
/// Helper functions
@@ -109,11 +110,6 @@ void CheckNumberOfArguments (const DagInit& d, unsigned minArgs) {
throw GetOperatorName(d) + ": too few arguments!";
}
-// IsDagEmpty - is this DAG marked with an empty marker?
-bool IsDagEmpty (const DagInit& d) {
- return GetOperatorName(d) == "empty_dag_marker";
-}
-
// EscapeVariableName - Escape commas and other symbols not allowed
// in the C++ variable names. Makes it possible to use options named
// like "Wa," (useful for prefix options).
@@ -188,21 +184,25 @@ void apply(F Fun, T0& Arg0, T1& Arg1) {
/// documentation for detailed description of differences.
namespace OptionType {
- enum OptionType { Alias, Switch, Parameter, ParameterList,
- Prefix, PrefixList};
+ enum OptionType { Alias, Switch, SwitchList,
+ Parameter, ParameterList, Prefix, PrefixList };
bool IsAlias(OptionType t) {
return (t == Alias);
}
bool IsList (OptionType t) {
- return (t == ParameterList || t == PrefixList);
+ return (t == SwitchList || t == ParameterList || t == PrefixList);
}
bool IsSwitch (OptionType t) {
return (t == Switch);
}
+ bool IsSwitchList (OptionType t) {
+ return (t == SwitchList);
+ }
+
bool IsParameter (OptionType t) {
return (t == Parameter || t == Prefix);
}
@@ -214,6 +214,8 @@ OptionType::OptionType stringToOptionType(const std::string& T) {
return OptionType::Alias;
else if (T == "switch_option")
return OptionType::Switch;
+ else if (T == "switch_list_option")
+ return OptionType::SwitchList;
else if (T == "parameter_option")
return OptionType::Parameter;
else if (T == "parameter_list_option")
@@ -228,10 +230,9 @@ OptionType::OptionType stringToOptionType(const std::string& T) {
namespace OptionDescriptionFlags {
enum OptionDescriptionFlags { Required = 0x1, Hidden = 0x2,
- ReallyHidden = 0x4, Extern = 0x8,
- OneOrMore = 0x10, Optional = 0x20,
- CommaSeparated = 0x40, ForwardNotSplit = 0x80,
- ZeroOrMore = 0x100 };
+ ReallyHidden = 0x4, OneOrMore = 0x8,
+ Optional = 0x10, CommaSeparated = 0x20,
+ ForwardNotSplit = 0x40, ZeroOrMore = 0x80 };
}
/// OptionDescription - Represents data contained in a single
@@ -256,7 +257,13 @@ struct OptionDescription {
/// GenVariableName - Returns the variable name used in the
/// generated C++ code.
- std::string GenVariableName() const;
+ std::string GenVariableName() const
+ { return "autogenerated::" + GenOptionType() + EscapeVariableName(Name); }
+
+ /// GenPlainVariableName - Returns the variable name without the namespace
+ /// prefix.
+ std::string GenPlainVariableName() const
+ { return GenOptionType() + EscapeVariableName(Name); }
/// Merge - Merge two option descriptions.
void Merge (const OptionDescription& other);
@@ -273,9 +280,6 @@ struct OptionDescription {
bool isCommaSeparated() const;
void setCommaSeparated();
- bool isExtern() const;
- void setExtern();
-
bool isForwardNotSplit() const;
void setForwardNotSplit();
@@ -300,12 +304,23 @@ struct OptionDescription {
bool isSwitch() const
{ return OptionType::IsSwitch(this->Type); }
+ bool isSwitchList() const
+ { return OptionType::IsSwitchList(this->Type); }
+
bool isParameter() const
{ return OptionType::IsParameter(this->Type); }
bool isList() const
{ return OptionType::IsList(this->Type); }
+ bool isParameterList() const
+ { return (OptionType::IsList(this->Type)
+ && !OptionType::IsSwitchList(this->Type)); }
+
+private:
+
+ // GenOptionType - Helper function used by GenVariableName().
+ std::string GenOptionType() const;
};
void OptionDescription::CheckConsistency() const {
@@ -359,13 +374,6 @@ void OptionDescription::setForwardNotSplit() {
Flags |= OptionDescriptionFlags::ForwardNotSplit;
}
-bool OptionDescription::isExtern() const {
- return Flags & OptionDescriptionFlags::Extern;
-}
-void OptionDescription::setExtern() {
- Flags |= OptionDescriptionFlags::Extern;
-}
-
bool OptionDescription::isRequired() const {
return Flags & OptionDescriptionFlags::Required;
}
@@ -417,6 +425,8 @@ const char* OptionDescription::GenTypeDeclaration() const {
return "cl::list<std::string>";
case OptionType::Switch:
return "cl::opt<bool>";
+ case OptionType::SwitchList:
+ return "cl::list<bool>";
case OptionType::Parameter:
case OptionType::Prefix:
default:
@@ -424,20 +434,21 @@ const char* OptionDescription::GenTypeDeclaration() const {
}
}
-std::string OptionDescription::GenVariableName() const {
- const std::string& EscapedName = EscapeVariableName(Name);
+std::string OptionDescription::GenOptionType() const {
switch (Type) {
case OptionType::Alias:
- return "AutoGeneratedAlias_" + EscapedName;
+ return "Alias_";
case OptionType::PrefixList:
case OptionType::ParameterList:
- return "AutoGeneratedList_" + EscapedName;
+ return "List_";
case OptionType::Switch:
- return "AutoGeneratedSwitch_" + EscapedName;
+ return "Switch_";
+ case OptionType::SwitchList:
+ return "SwitchList_";
case OptionType::Prefix:
case OptionType::Parameter:
default:
- return "AutoGeneratedParameter_" + EscapedName;
+ return "Parameter_";
}
}
@@ -457,9 +468,11 @@ public:
// wrong type.
const OptionDescription& FindSwitch(const std::string& OptName) const;
const OptionDescription& FindParameter(const std::string& OptName) const;
- const OptionDescription& FindList(const std::string& OptName) const;
+ const OptionDescription& FindParameterList(const std::string& OptName) const;
const OptionDescription&
FindListOrParameter(const std::string& OptName) const;
+ const OptionDescription&
+ FindParameterListOrParameter(const std::string& OptName) const;
/// insertDescription - Insert new OptionDescription into
/// OptionDescriptions list
@@ -489,10 +502,10 @@ OptionDescriptions::FindSwitch(const std::string& OptName) const {
}
const OptionDescription&
-OptionDescriptions::FindList(const std::string& OptName) const {
+OptionDescriptions::FindParameterList(const std::string& OptName) const {
const OptionDescription& OptDesc = this->FindOption(OptName);
- if (!OptDesc.isList())
- throw OptName + ": incorrect option type - should be a list!";
+ if (!OptDesc.isList() || OptDesc.isSwitchList())
+ throw OptName + ": incorrect option type - should be a parameter list!";
return OptDesc;
}
@@ -513,6 +526,16 @@ OptionDescriptions::FindListOrParameter(const std::string& OptName) const {
return OptDesc;
}
+const OptionDescription&
+OptionDescriptions::FindParameterListOrParameter
+(const std::string& OptName) const {
+ const OptionDescription& OptDesc = this->FindOption(OptName);
+ if ((!OptDesc.isList() && !OptDesc.isParameter()) || OptDesc.isSwitchList())
+ throw OptName
+ + ": incorrect option type - should be a parameter list or parameter!";
+ return OptDesc;
+}
+
void OptionDescriptions::InsertDescription (const OptionDescription& o) {
container_type::iterator I = Descriptions.find(o.Name);
if (I != Descriptions.end()) {
@@ -586,7 +609,6 @@ void InvokeDagInitHandler(const FunctionObject* const Obj,
((Obj)->*(h))(Dag, IndentLevel, O);
}
-
template <typename H>
typename HandlerTable<H>::HandlerMap HandlerTable<H>::Handlers_;
@@ -615,7 +637,6 @@ public:
: optDesc_(OD)
{
if (!staticMembersInitialized_) {
- AddHandler("extern", &CollectOptionProperties::onExtern);
AddHandler("help", &CollectOptionProperties::onHelp);
AddHandler("hidden", &CollectOptionProperties::onHidden);
AddHandler("init", &CollectOptionProperties::onInit);
@@ -644,11 +665,6 @@ private:
/// Option property handlers --
/// Methods that handle option properties such as (help) or (hidden).
- void onExtern (const DagInit& d) {
- CheckNumberOfArguments(d, 0);
- optDesc_.setExtern();
- }
-
void onHelp (const DagInit& d) {
CheckNumberOfArguments(d, 1);
optDesc_.Help = EscapeQuotes(InitPtrToString(d.getArg(0)));
@@ -666,8 +682,8 @@ private:
void onCommaSeparated (const DagInit& d) {
CheckNumberOfArguments(d, 0);
- if (!optDesc_.isList())
- throw "'comma_separated' is valid only on list options!";
+ if (!optDesc_.isParameterList())
+ throw "'comma_separated' is valid only on parameter list options!";
optDesc_.setCommaSeparated();
}
@@ -709,7 +725,7 @@ private:
void onZeroOrMore (const DagInit& d) {
CheckNumberOfArguments(d, 0);
- if (OptionType::IsList(optDesc_.Type))
+ if (optDesc_.isList())
llvm::errs() << "Warning: specifying the 'zero_or_more' property "
"on a list option has no effect.\n";
@@ -720,7 +736,7 @@ private:
void onOptional (const DagInit& d) {
CheckNumberOfArguments(d, 0);
- if (!OptionType::IsList(optDesc_.Type))
+ if (!optDesc_.isList())
llvm::errs() << "Warning: specifying the 'optional' property"
"on a non-list option has no effect.\n";
@@ -734,7 +750,7 @@ private:
if (val < 2)
throw "Error in the 'multi_val' property: "
"the value must be greater than 1!";
- if (!OptionType::IsList(optDesc_.Type))
+ if (!optDesc_.isParameterList())
throw "The multi_val property is valid only on list options!";
optDesc_.MultiVal = val;
}
@@ -761,16 +777,16 @@ public:
OptionDescription OD(Type, Name);
- if (!OD.isExtern())
- CheckNumberOfArguments(d, 2);
+ CheckNumberOfArguments(d, 2);
if (OD.isAlias()) {
// Aliases store the aliased option name in the 'Help' field.
OD.Help = InitPtrToString(d.getArg(1));
}
- else if (!OD.isExtern()) {
+ else {
processOptionProperties(d, OD);
}
+
OptDescs_.InsertDescription(OD);
}
@@ -789,15 +805,14 @@ private:
/// CollectOptionDescriptions - Collects option properties from all
/// OptionLists.
-void CollectOptionDescriptions (RecordVector::const_iterator B,
- RecordVector::const_iterator E,
+void CollectOptionDescriptions (const RecordVector& V,
OptionDescriptions& OptDescs)
{
// For every OptionList:
- for (; B!=E; ++B) {
- RecordVector::value_type T = *B;
+ for (RecordVector::const_iterator B = V.begin(),
+ E = V.end(); B!=E; ++B) {
// Throws an exception if the value does not exist.
- ListInit* PropList = T->getValueAsListInit("options");
+ ListInit* PropList = (*B)->getValueAsListInit("options");
// For every option description in this list:
// collect the information and
@@ -831,11 +846,7 @@ struct ToolDescription : public RefCountedBase<ToolDescription> {
// Default ctor here is needed because StringMap can only store
// DefaultConstructible objects
- ToolDescription ()
- : CmdLine(0), Actions(0), OutFileOption("-o"),
- Flags(0), OnEmpty(0)
- {}
- ToolDescription (const std::string& n)
+ ToolDescription (const std::string &n = "")
: Name(n), CmdLine(0), Actions(0), OutFileOption("-o"),
Flags(0), OnEmpty(0)
{}
@@ -974,12 +985,12 @@ private:
/// CollectToolDescriptions - Gather information about tool properties
/// from the parsed TableGen data (basically a wrapper for the
/// CollectToolProperties function object).
-void CollectToolDescriptions (RecordVector::const_iterator B,
- RecordVector::const_iterator E,
+void CollectToolDescriptions (const RecordVector& Tools,
ToolDescriptions& ToolDescs)
{
// Iterate over a properties list of every Tool definition
- for (;B!=E;++B) {
+ for (RecordVector::const_iterator B = Tools.begin(),
+ E = Tools.end(); B!=E; ++B) {
const Record* T = *B;
// Throws an exception if the value does not exist.
ListInit* PropList = T->getValueAsListInit("properties");
@@ -995,30 +1006,17 @@ void CollectToolDescriptions (RecordVector::const_iterator B,
/// FillInEdgeVector - Merge all compilation graph definitions into
/// one single edge list.
-void FillInEdgeVector(RecordVector::const_iterator B,
- RecordVector::const_iterator E, RecordVector& Out) {
- for (; B != E; ++B) {
- const ListInit* edges = (*B)->getValueAsListInit("edges");
-
- for (unsigned i = 0; i < edges->size(); ++i)
- Out.push_back(edges->getElementAsRecord(i));
- }
-}
-
-/// CalculatePriority - Calculate the priority of this plugin.
-int CalculatePriority(RecordVector::const_iterator B,
- RecordVector::const_iterator E) {
- int priority = 0;
+void FillInEdgeVector(const RecordVector& CompilationGraphs,
+ DagVector& Out) {
+ for (RecordVector::const_iterator B = CompilationGraphs.begin(),
+ E = CompilationGraphs.end(); B != E; ++B) {
+ const ListInit* Edges = (*B)->getValueAsListInit("edges");
- if (B != E) {
- priority = static_cast<int>((*B)->getValueAsInt("priority"));
-
- if (++B != E)
- throw "More than one 'PluginPriority' instance found: "
- "most probably an error!";
+ for (ListInit::const_iterator B = Edges->begin(),
+ E = Edges->end(); B != E; ++B) {
+ Out.push_back(&InitPtrToDag(*B));
+ }
}
-
- return priority;
}
/// NotInGraph - Helper function object for FilterNotInGraph.
@@ -1038,18 +1036,18 @@ public:
/// FilterNotInGraph - Filter out from ToolDescs all Tools not
/// mentioned in the compilation graph definition.
-void FilterNotInGraph (const RecordVector& EdgeVector,
+void FilterNotInGraph (const DagVector& EdgeVector,
ToolDescriptions& ToolDescs) {
// List all tools mentioned in the graph.
llvm::StringSet<> ToolsInGraph;
- for (RecordVector::const_iterator B = EdgeVector.begin(),
+ for (DagVector::const_iterator B = EdgeVector.begin(),
E = EdgeVector.end(); B != E; ++B) {
- const Record* Edge = *B;
- const std::string& NodeA = Edge->getValueAsString("a");
- const std::string& NodeB = Edge->getValueAsString("b");
+ const DagInit* Edge = *B;
+ const std::string& NodeA = InitPtrToString(Edge->getArg(0));
+ const std::string& NodeB = InitPtrToString(Edge->getArg(1));
if (NodeA != "root")
ToolsInGraph.insert(NodeA);
@@ -1079,10 +1077,8 @@ void FillInToolToLang (const ToolDescriptions& ToolDescs,
}
/// TypecheckGraph - Check that names for output and input languages
-/// on all edges do match. This doesn't do much when the information
-/// about the whole graph is not available (i.e. when compiling most
-/// plugins).
-void TypecheckGraph (const RecordVector& EdgeVector,
+/// on all edges do match.
+void TypecheckGraph (const DagVector& EdgeVector,
const ToolDescriptions& ToolDescs) {
StringMap<StringSet<> > ToolToInLang;
StringMap<std::string> ToolToOutLang;
@@ -1091,11 +1087,11 @@ void TypecheckGraph (const RecordVector& EdgeVector,
StringMap<std::string>::iterator IAE = ToolToOutLang.end();
StringMap<StringSet<> >::iterator IBE = ToolToInLang.end();
- for (RecordVector::const_iterator B = EdgeVector.begin(),
+ for (DagVector::const_iterator B = EdgeVector.begin(),
E = EdgeVector.end(); B != E; ++B) {
- const Record* Edge = *B;
- const std::string& NodeA = Edge->getValueAsString("a");
- const std::string& NodeB = Edge->getValueAsString("b");
+ const DagInit* Edge = *B;
+ const std::string& NodeA = InitPtrToString(Edge->getArg(0));
+ const std::string& NodeB = InitPtrToString(Edge->getArg(1));
StringMap<std::string>::iterator IA = ToolToOutLang.find(NodeA);
StringMap<StringSet<> >::iterator IB = ToolToInLang.find(NodeB);
@@ -1234,10 +1230,15 @@ public:
}
};
+/// IsOptionalEdge - Validate that the 'optional_edge' has proper structure.
+bool IsOptionalEdge (const DagInit& Edg) {
+ return (GetOperatorName(Edg) == "optional_edge") && (Edg.getNumArgs() > 2);
+}
+
/// CheckForSuperfluousOptions - Check that there are no side
/// effect-free options (specified only in the OptionList). Otherwise,
/// output a warning.
-void CheckForSuperfluousOptions (const RecordVector& Edges,
+void CheckForSuperfluousOptions (const DagVector& EdgeVector,
const ToolDescriptions& ToolDescs,
const OptionDescriptions& OptDescs) {
llvm::StringSet<> nonSuperfluousOptions;
@@ -1255,13 +1256,13 @@ void CheckForSuperfluousOptions (const RecordVector& Edges,
// Add all options mentioned in the 'case' clauses of the
// OptionalEdges of the compilation graph to the set of
// non-superfluous options.
- for (RecordVector::const_iterator B = Edges.begin(), E = Edges.end();
- B != E; ++B) {
- const Record* Edge = *B;
- DagInit& Weight = *Edge->getValueAsDag("weight");
-
- if (!IsDagEmpty(Weight))
+ for (DagVector::const_iterator B = EdgeVector.begin(),
+ E = EdgeVector.end(); B != E; ++B) {
+ const DagInit& Edge = **B;
+ if (IsOptionalEdge(Edge)) {
+ const DagInit& Weight = InitPtrToDag(Edge.getArg(2));
WalkCase(&Weight, ExtractOptionNames(nonSuperfluousOptions), Id());
+ }
}
// Check that all options in OptDescs belong to the set of
@@ -1440,7 +1441,7 @@ bool EmitCaseTest2Args(const std::string& TestName,
return true;
}
else if (TestName == "element_in_list") {
- const OptionDescription& OptDesc = OptDescs.FindList(OptName);
+ const OptionDescription& OptDesc = OptDescs.FindParameterList(OptName);
const std::string& VarName = OptDesc.GenVariableName();
O << "std::find(" << VarName << ".begin(),\n";
O.indent(IndentLevel + Indent1)
@@ -1815,6 +1816,24 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
}
+/// EmitForEachListElementCycleHeader - Emit common code for iterating through
+/// all elements of a list. Helper function used by
+/// EmitForwardOptionPropertyHandlingCode.
+void EmitForEachListElementCycleHeader (const OptionDescription& D,
+ unsigned IndentLevel,
+ raw_ostream& O) {
+ unsigned IndentLevel1 = IndentLevel + Indent1;
+
+ O.indent(IndentLevel)
+ << "for (" << D.GenTypeDeclaration()
+ << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
+ O.indent(IndentLevel)
+ << "E = " << D.GenVariableName() << ".end(); B != E;) {\n";
+ O.indent(IndentLevel1) << "unsigned pos = " << D.GenVariableName()
+ << ".getPosition(B - " << D.GenVariableName()
+ << ".begin());\n";
+}
+
/// EmitForwardOptionPropertyHandlingCode - Helper function used to
/// implement EmitActionHandler. Emits code for
/// handling the (forward) and (forward_as) option properties.
@@ -1855,14 +1874,7 @@ void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
<< D.GenVariableName() << "));\n";
break;
case OptionType::PrefixList:
- O.indent(IndentLevel)
- << "for (" << D.GenTypeDeclaration()
- << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
- O.indent(IndentLevel)
- << "E = " << D.GenVariableName() << ".end(); B != E;) {\n";
- O.indent(IndentLevel1) << "unsigned pos = " << D.GenVariableName()
- << ".getPosition(B - " << D.GenVariableName()
- << ".begin());\n";
+ EmitForEachListElementCycleHeader(D, IndentLevel, O);
O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
<< Name << "\" + " << "*B));\n";
O.indent(IndentLevel1) << "++B;\n";
@@ -1875,14 +1887,7 @@ void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
O.indent(IndentLevel) << "}\n";
break;
case OptionType::ParameterList:
- O.indent(IndentLevel)
- << "for (" << D.GenTypeDeclaration() << "::iterator B = "
- << D.GenVariableName() << ".begin(),\n";
- O.indent(IndentLevel) << "E = " << D.GenVariableName()
- << ".end() ; B != E;) {\n";
- O.indent(IndentLevel1) << "unsigned pos = " << D.GenVariableName()
- << ".getPosition(B - " << D.GenVariableName()
- << ".begin());\n";
+ EmitForEachListElementCycleHeader(D, IndentLevel, O);
O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
<< Name << "\"));\n";
@@ -1893,6 +1898,13 @@ void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
O.indent(IndentLevel) << "}\n";
break;
+ case OptionType::SwitchList:
+ EmitForEachListElementCycleHeader(D, IndentLevel, O);
+ O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
+ << Name << "\"));\n";
+ O.indent(IndentLevel1) << "++B;\n";
+ O.indent(IndentLevel) << "}\n";
+ break;
case OptionType::Alias:
default:
throw "Aliases are not allowed in tool option descriptions!";
@@ -1908,10 +1920,10 @@ struct ActionHandlingCallbackBase
unsigned IndentLevel, raw_ostream& O) const
{
O.indent(IndentLevel)
- << "throw std::runtime_error(\"" <<
- (d.getNumArgs() >= 1 ? InitPtrToString(d.getArg(0))
- : "Unknown error!")
+ << "PrintError(\""
+ << (d.getNumArgs() >= 1 ? InitPtrToString(d.getArg(0)) : "Unknown error!")
<< "\");\n";
+ O.indent(IndentLevel) << "return 1;\n";
}
void onWarningDag(const DagInit& d,
@@ -1926,7 +1938,6 @@ struct ActionHandlingCallbackBase
/// EmitActionHandlersCallback - Emit code that handles actions. Used by
/// EmitGenerateActionMethod() as an argument to EmitCaseConstructHandler().
-
class EmitActionHandlersCallback;
typedef void (EmitActionHandlersCallback::* EmitActionHandlersCallbackHandler)
@@ -1997,7 +2008,12 @@ class EmitActionHandlersCallback :
{
CheckNumberOfArguments(Dag, 1);
const std::string& Name = InitPtrToString(Dag.getArg(0));
- const OptionDescription& D = OptDescs.FindListOrParameter(Name);
+ const OptionDescription& D = OptDescs.FindParameterListOrParameter(Name);
+
+ if (D.isSwitchList()) {
+ throw std::runtime_error
+ ("forward_value is not allowed with switch_list");
+ }
if (D.isParameter()) {
O.indent(IndentLevel) << "vec.push_back(std::make_pair("
@@ -2005,8 +2021,9 @@ class EmitActionHandlersCallback :
<< D.GenVariableName() << "));\n";
}
else {
- O.indent(IndentLevel) << "for (cl::list<std::string>::iterator B = "
- << D.GenVariableName() << ".begin(), \n";
+ O.indent(IndentLevel) << "for (" << D.GenTypeDeclaration()
+ << "::iterator B = " << D.GenVariableName()
+ << ".begin(), \n";
O.indent(IndentLevel + Indent1) << " E = " << D.GenVariableName()
<< ".end(); B != E; ++B)\n";
O.indent(IndentLevel) << "{\n";
@@ -2026,7 +2043,7 @@ class EmitActionHandlersCallback :
CheckNumberOfArguments(Dag, 2);
const std::string& Name = InitPtrToString(Dag.getArg(0));
const std::string& Hook = InitPtrToString(Dag.getArg(1));
- const OptionDescription& D = OptDescs.FindListOrParameter(Name);
+ const OptionDescription& D = OptDescs.FindParameterListOrParameter(Name);
O.indent(IndentLevel) << "vec.push_back(std::make_pair("
<< D.GenVariableName() << ".getPosition("
@@ -2099,25 +2116,32 @@ class EmitActionHandlersCallback :
};
void EmitGenerateActionMethodHeader(const ToolDescription& D,
- bool IsJoin, raw_ostream& O)
+ bool IsJoin, bool Naked,
+ raw_ostream& O)
{
+ O.indent(Indent1) << "int GenerateAction(Action& Out,\n";
+
if (IsJoin)
- O.indent(Indent1) << "Action GenerateAction(const PathVector& inFiles,\n";
+ O.indent(Indent2) << "const PathVector& inFiles,\n";
else
- O.indent(Indent1) << "Action GenerateAction(const sys::Path& inFile,\n";
+ O.indent(Indent2) << "const sys::Path& inFile,\n";
- O.indent(Indent2) << "bool HasChildren,\n";
+ O.indent(Indent2) << "const bool HasChildren,\n";
O.indent(Indent2) << "const llvm::sys::Path& TempDir,\n";
O.indent(Indent2) << "const InputLanguagesSet& InLangs,\n";
O.indent(Indent2) << "const LanguageMap& LangMap) const\n";
O.indent(Indent1) << "{\n";
- O.indent(Indent2) << "std::string cmd;\n";
- O.indent(Indent2) << "std::string out_file;\n";
- O.indent(Indent2) << "std::vector<std::pair<unsigned, std::string> > vec;\n";
- O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n";
- O.indent(Indent2) << "bool no_out_file = false;\n";
- O.indent(Indent2) << "const char* output_suffix = \""
- << D.OutputSuffix << "\";\n";
+
+ if (!Naked) {
+ O.indent(Indent2) << "std::string cmd;\n";
+ O.indent(Indent2) << "std::string out_file;\n";
+ O.indent(Indent2)
+ << "std::vector<std::pair<unsigned, std::string> > vec;\n";
+ O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n";
+ O.indent(Indent2) << "bool no_out_file = false;\n";
+ O.indent(Indent2) << "std::string output_suffix(\""
+ << D.OutputSuffix << "\");\n";
+ }
}
// EmitGenerateActionMethod - Emit either a normal or a "join" version of the
@@ -2126,7 +2150,7 @@ void EmitGenerateActionMethod (const ToolDescription& D,
const OptionDescriptions& OptDescs,
bool IsJoin, raw_ostream& O) {
- EmitGenerateActionMethodHeader(D, IsJoin, O);
+ EmitGenerateActionMethodHeader(D, IsJoin, /* Naked = */ false, O);
if (!D.CmdLine)
throw "Tool " + D.Name + " has no cmd_line property!";
@@ -2173,25 +2197,29 @@ void EmitGenerateActionMethod (const ToolDescription& D,
O.indent(Indent3) << "out_file = this->OutFilename("
<< (IsJoin ? "sys::Path(),\n" : "inFile,\n");
- O.indent(Indent4) << "TempDir, stop_compilation, output_suffix).str();\n\n";
+ O.indent(Indent4) <<
+ "TempDir, stop_compilation, output_suffix.c_str()).str();\n\n";
O.indent(Indent3) << "vec.push_back(std::make_pair(65536, out_file));\n";
O.indent(Indent2) << "}\n\n";
// Handle the Sink property.
+ std::string SinkOption("autogenerated::");
+ SinkOption += SinkOptionName;
if (D.isSink()) {
- O.indent(Indent2) << "if (!" << SinkOptionName << ".empty()) {\n";
+ O.indent(Indent2) << "if (!" << SinkOption << ".empty()) {\n";
O.indent(Indent3) << "for (cl::list<std::string>::iterator B = "
- << SinkOptionName << ".begin(), E = " << SinkOptionName
+ << SinkOption << ".begin(), E = " << SinkOption
<< ".end(); B != E; ++B)\n";
- O.indent(Indent4) << "vec.push_back(std::make_pair(" << SinkOptionName
- << ".getPosition(B - " << SinkOptionName
+ O.indent(Indent4) << "vec.push_back(std::make_pair(" << SinkOption
+ << ".getPosition(B - " << SinkOption
<< ".begin()), *B));\n";
O.indent(Indent2) << "}\n";
}
- O.indent(Indent2) << "return Action(cmd, this->SortArgs(vec), "
+ O.indent(Indent2) << "Out.Construct(cmd, this->SortArgs(vec), "
<< "stop_compilation, out_file);\n";
+ O.indent(Indent2) << "return 0;\n";
O.indent(Indent1) << "}\n\n";
}
@@ -2201,14 +2229,11 @@ void EmitGenerateActionMethods (const ToolDescription& ToolDesc,
const OptionDescriptions& OptDescs,
raw_ostream& O) {
if (!ToolDesc.isJoin()) {
- O.indent(Indent1) << "Action GenerateAction(const PathVector& inFiles,\n";
- O.indent(Indent2) << "bool HasChildren,\n";
- O.indent(Indent2) << "const llvm::sys::Path& TempDir,\n";
- O.indent(Indent2) << "const InputLanguagesSet& InLangs,\n";
- O.indent(Indent2) << "const LanguageMap& LangMap) const\n";
- O.indent(Indent1) << "{\n";
- O.indent(Indent2) << "throw std::runtime_error(\"" << ToolDesc.Name
+ EmitGenerateActionMethodHeader(ToolDesc, /* IsJoin = */ true,
+ /* Naked = */ true, O);
+ O.indent(Indent2) << "PrintError(\"" << ToolDesc.Name
<< " is not a Join tool!\");\n";
+ O.indent(Indent2) << "return -1;\n";
O.indent(Indent1) << "}\n\n";
}
else {
@@ -2321,8 +2346,7 @@ void EmitToolClassDefinition (const ToolDescription& D,
/// EmitOptionDefinitions - Iterate over a list of option descriptions
/// and emit registration code.
void EmitOptionDefinitions (const OptionDescriptions& descs,
- bool HasSink, bool HasExterns,
- raw_ostream& O)
+ bool HasSink, raw_ostream& O)
{
std::vector<OptionDescription> Aliases;
@@ -2336,16 +2360,8 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
continue;
}
- if (val.isExtern())
- O << "extern ";
-
O << val.GenTypeDeclaration() << ' '
- << val.GenVariableName();
-
- if (val.isExtern()) {
- O << ";\n";
- continue;
- }
+ << val.GenPlainVariableName();
O << "(\"" << val.Name << "\"\n";
@@ -2396,7 +2412,7 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
const OptionDescription& val = *B;
O << val.GenTypeDeclaration() << ' '
- << val.GenVariableName()
+ << val.GenPlainVariableName()
<< "(\"" << val.Name << '\"';
const OptionDescription& D = descs.FindOption(val.Help);
@@ -2407,9 +2423,7 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
// Emit the sink option.
if (HasSink)
- O << (HasExterns ? "extern cl" : "cl")
- << "::list<std::string> " << SinkOptionName
- << (HasExterns ? ";\n" : "(cl::Sink);\n");
+ O << "cl::list<std::string> " << SinkOptionName << "(cl::Sink);\n";
O << '\n';
}
@@ -2492,8 +2506,15 @@ class EmitPreprocessOptionsCallback :
O.indent(IndentLevel) << OptDesc.GenVariableName() << ".clear();\n";
for (ListInit::const_iterator B = List.begin(), E = List.end();
B != E; ++B) {
- O.indent(IndentLevel) << OptDesc.GenVariableName() << ".push_back(\""
- << InitPtrToString(*B) << "\");\n";
+ const Init* CurElem = *B;
+ if (OptDesc.isSwitchList())
+ CheckBooleanConstant(CurElem);
+
+ O.indent(IndentLevel)
+ << OptDesc.GenVariableName() << ".push_back(\""
+ << (OptDesc.isSwitchList() ? CurElem->getAsString()
+ : InitPtrToString(CurElem))
+ << "\");\n";
}
}
else if (OptDesc.isSwitch()) {
@@ -2561,11 +2582,11 @@ public:
};
-/// EmitPreprocessOptions - Emit the PreprocessOptionsLocal() function.
+/// EmitPreprocessOptions - Emit the PreprocessOptions() function.
void EmitPreprocessOptions (const RecordKeeper& Records,
const OptionDescriptions& OptDecs, raw_ostream& O)
{
- O << "void PreprocessOptionsLocal() {\n";
+ O << "int PreprocessOptions () {\n";
const RecordVector& OptionPreprocessors =
Records.getAllDerivedDefinitions("OptionPreprocessor");
@@ -2578,58 +2599,101 @@ void EmitPreprocessOptions (const RecordKeeper& Records,
false, OptDecs, O);
}
+ O << '\n';
+ O.indent(Indent1) << "return 0;\n";
O << "}\n\n";
}
-/// EmitPopulateLanguageMap - Emit the PopulateLanguageMapLocal() function.
-void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
+class DoEmitPopulateLanguageMap;
+typedef void (DoEmitPopulateLanguageMap::* DoEmitPopulateLanguageMapHandler)
+(const DagInit& D);
+
+class DoEmitPopulateLanguageMap
+: public HandlerTable<DoEmitPopulateLanguageMapHandler>
{
- O << "void PopulateLanguageMapLocal(LanguageMap& langMap) {\n";
+private:
+ raw_ostream& O_;
- // Get the relevant field out of RecordKeeper
- const Record* LangMapRecord = Records.getDef("LanguageMap");
+public:
+
+ explicit DoEmitPopulateLanguageMap (raw_ostream& O) : O_(O) {
+ if (!staticMembersInitialized_) {
+ AddHandler("lang_to_suffixes",
+ &DoEmitPopulateLanguageMap::onLangToSuffixes);
+
+ staticMembersInitialized_ = true;
+ }
+ }
- // It is allowed for a plugin to have no language map.
- if (LangMapRecord) {
+ void operator() (Init* I) {
+ InvokeDagInitHandler(this, I);
+ }
- ListInit* LangsToSuffixesList = LangMapRecord->getValueAsListInit("map");
- if (!LangsToSuffixesList)
- throw "Error in the language map definition!";
+private:
- for (unsigned i = 0; i < LangsToSuffixesList->size(); ++i) {
- const Record* LangToSuffixes = LangsToSuffixesList->getElementAsRecord(i);
+ void onLangToSuffixes (const DagInit& d) {
+ CheckNumberOfArguments(d, 2);
- const std::string& Lang = LangToSuffixes->getValueAsString("lang");
- const ListInit* Suffixes = LangToSuffixes->getValueAsListInit("suffixes");
+ const std::string& Lang = InitPtrToString(d.getArg(0));
+ Init* Suffixes = d.getArg(1);
- for (unsigned i = 0; i < Suffixes->size(); ++i)
- O.indent(Indent1) << "langMap[\""
- << InitPtrToString(Suffixes->getElement(i))
- << "\"] = \"" << Lang << "\";\n";
+ // Second argument to lang_to_suffixes is either a single string...
+ if (typeid(*Suffixes) == typeid(StringInit)) {
+ O_.indent(Indent1) << "langMap[\"" << InitPtrToString(Suffixes)
+ << "\"] = \"" << Lang << "\";\n";
+ }
+ // ...or a list of strings.
+ else {
+ const ListInit& Lst = InitPtrToList(Suffixes);
+ assert(Lst.size() != 0);
+ for (ListInit::const_iterator B = Lst.begin(), E = Lst.end();
+ B != E; ++B) {
+ O_.indent(Indent1) << "langMap[\"" << InitPtrToString(*B)
+ << "\"] = \"" << Lang << "\";\n";
+ }
}
}
+};
+
+/// EmitPopulateLanguageMap - Emit the PopulateLanguageMap() function.
+void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
+{
+ O << "int PopulateLanguageMap (LanguageMap& langMap) {\n";
+
+ // For each LangMap:
+ const RecordVector& LangMaps =
+ Records.getAllDerivedDefinitions("LanguageMap");
+
+ for (RecordVector::const_iterator B = LangMaps.begin(),
+ E = LangMaps.end(); B!=E; ++B) {
+ ListInit* LangMap = (*B)->getValueAsListInit("map");
+ std::for_each(LangMap->begin(), LangMap->end(),
+ DoEmitPopulateLanguageMap(O));
+ }
+
+ O << '\n';
+ O.indent(Indent1) << "return 0;\n";
O << "}\n\n";
}
-/// IncDecWeight - Helper function passed to EmitCaseConstructHandler()
-/// by EmitEdgeClass().
-void IncDecWeight (const Init* i, unsigned IndentLevel,
- raw_ostream& O) {
+/// EmitEdgePropertyHandlerCallback - Emits code that handles edge
+/// properties. Helper function passed to EmitCaseConstructHandler() by
+/// EmitEdgeClass().
+void EmitEdgePropertyHandlerCallback (const Init* i, unsigned IndentLevel,
+ raw_ostream& O) {
const DagInit& d = InitPtrToDag(i);
const std::string& OpName = GetOperatorName(d);
if (OpName == "inc_weight") {
O.indent(IndentLevel) << "ret += ";
}
- else if (OpName == "dec_weight") {
- O.indent(IndentLevel) << "ret -= ";
- }
else if (OpName == "error") {
CheckNumberOfArguments(d, 1);
- O.indent(IndentLevel) << "throw std::runtime_error(\""
+ O.indent(IndentLevel) << "PrintError(\""
<< InitPtrToString(d.getArg(0))
<< "\");\n";
+ O.indent(IndentLevel) << "return -1;\n";
return;
}
else {
@@ -2646,7 +2710,7 @@ void IncDecWeight (const Init* i, unsigned IndentLevel,
/// EmitEdgeClass - Emit a single Edge# class.
void EmitEdgeClass (unsigned N, const std::string& Target,
- DagInit* Case, const OptionDescriptions& OptDescs,
+ const DagInit& Case, const OptionDescriptions& OptDescs,
raw_ostream& O) {
// Class constructor.
@@ -2657,40 +2721,48 @@ void EmitEdgeClass (unsigned N, const std::string& Target,
// Function Weight().
O.indent(Indent1)
- << "unsigned Weight(const InputLanguagesSet& InLangs) const {\n";
+ << "int Weight(const InputLanguagesSet& InLangs) const {\n";
O.indent(Indent2) << "unsigned ret = 0;\n";
// Handle the 'case' construct.
- EmitCaseConstructHandler(Case, Indent2, IncDecWeight, false, OptDescs, O);
+ EmitCaseConstructHandler(&Case, Indent2, EmitEdgePropertyHandlerCallback,
+ false, OptDescs, O);
O.indent(Indent2) << "return ret;\n";
O.indent(Indent1) << "}\n\n};\n\n";
}
/// EmitEdgeClasses - Emit Edge* classes that represent graph edges.
-void EmitEdgeClasses (const RecordVector& EdgeVector,
+void EmitEdgeClasses (const DagVector& EdgeVector,
const OptionDescriptions& OptDescs,
raw_ostream& O) {
int i = 0;
- for (RecordVector::const_iterator B = EdgeVector.begin(),
+ for (DagVector::const_iterator B = EdgeVector.begin(),
E = EdgeVector.end(); B != E; ++B) {
- const Record* Edge = *B;
- const std::string& NodeB = Edge->getValueAsString("b");
- DagInit& Weight = *Edge->getValueAsDag("weight");
+ const DagInit& Edge = **B;
+ const std::string& Name = GetOperatorName(Edge);
+
+ if (Name == "optional_edge") {
+ assert(IsOptionalEdge(Edge));
+ const std::string& NodeB = InitPtrToString(Edge.getArg(1));
+
+ const DagInit& Weight = InitPtrToDag(Edge.getArg(2));
+ EmitEdgeClass(i, NodeB, Weight, OptDescs, O);
+ }
+ else if (Name != "edge") {
+ throw "Unknown edge class: '" + Name + "'!";
+ }
- if (!IsDagEmpty(Weight))
- EmitEdgeClass(i, NodeB, &Weight, OptDescs, O);
++i;
}
}
-/// EmitPopulateCompilationGraph - Emit the PopulateCompilationGraphLocal()
-/// function.
-void EmitPopulateCompilationGraph (const RecordVector& EdgeVector,
+/// EmitPopulateCompilationGraph - Emit the PopulateCompilationGraph() function.
+void EmitPopulateCompilationGraph (const DagVector& EdgeVector,
const ToolDescriptions& ToolDescs,
raw_ostream& O)
{
- O << "void PopulateCompilationGraphLocal(CompilationGraph& G) {\n";
+ O << "int PopulateCompilationGraph (CompilationGraph& G) {\n";
for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
E = ToolDescs.end(); B != E; ++B)
@@ -2701,24 +2773,27 @@ void EmitPopulateCompilationGraph (const RecordVector& EdgeVector,
// Insert edges.
int i = 0;
- for (RecordVector::const_iterator B = EdgeVector.begin(),
+ for (DagVector::const_iterator B = EdgeVector.begin(),
E = EdgeVector.end(); B != E; ++B) {
- const Record* Edge = *B;
- const std::string& NodeA = Edge->getValueAsString("a");
- const std::string& NodeB = Edge->getValueAsString("b");
- DagInit& Weight = *Edge->getValueAsDag("weight");
+ const DagInit& Edge = **B;
+ const std::string& NodeA = InitPtrToString(Edge.getArg(0));
+ const std::string& NodeB = InitPtrToString(Edge.getArg(1));
- O.indent(Indent1) << "G.insertEdge(\"" << NodeA << "\", ";
+ O.indent(Indent1) << "if (int ret = G.insertEdge(\"" << NodeA << "\", ";
- if (IsDagEmpty(Weight))
- O << "new SimpleEdge(\"" << NodeB << "\")";
- else
+ if (IsOptionalEdge(Edge))
O << "new Edge" << i << "()";
+ else
+ O << "new SimpleEdge(\"" << NodeB << "\")";
+
+ O << "))\n";
+ O.indent(Indent2) << "return ret;\n";
- O << ");\n";
++i;
}
+ O << '\n';
+ O.indent(Indent1) << "return 0;\n";
O << "}\n\n";
}
@@ -2762,7 +2837,8 @@ public:
CheckNumberOfArguments(Dag, 2);
const std::string& OptName = InitPtrToString(Dag.getArg(0));
const std::string& HookName = InitPtrToString(Dag.getArg(1));
- const OptionDescription& D = OptDescs_.FindOption(OptName);
+ const OptionDescription& D =
+ OptDescs_.FindParameterListOrParameter(OptName);
HookNames_[HookName] = HookInfo(D.isList() ? HookInfo::ListHook
: HookInfo::ArgHook);
@@ -2827,9 +2903,6 @@ public:
this->onCmdLine(InitPtrToString(Arg));
}
- void operator()(const DagInit* Test, unsigned, bool) {
- this->operator()(Test);
- }
void operator()(const Init* Statement, unsigned) {
this->operator()(Statement);
}
@@ -2873,7 +2946,6 @@ void EmitHookDeclarations(const ToolDescriptions& ToolDescs,
if (HookNames.empty())
return;
- O << "namespace hooks {\n";
for (HookInfoMap::const_iterator B = HookNames.begin(),
E = HookNames.end(); B != E; ++B) {
const char* HookName = B->first();
@@ -2892,23 +2964,6 @@ void EmitHookDeclarations(const ToolDescriptions& ToolDescs,
O <<");\n";
}
- O << "}\n\n";
-}
-
-/// EmitRegisterPlugin - Emit code to register this plugin.
-void EmitRegisterPlugin(int Priority, raw_ostream& O) {
- O << "struct Plugin : public llvmc::BasePlugin {\n\n";
- O.indent(Indent1) << "int Priority() const { return "
- << Priority << "; }\n\n";
- O.indent(Indent1) << "void PreprocessOptions() const\n";
- O.indent(Indent1) << "{ PreprocessOptionsLocal(); }\n\n";
- O.indent(Indent1) << "void PopulateLanguageMap(LanguageMap& langMap) const\n";
- O.indent(Indent1) << "{ PopulateLanguageMapLocal(langMap); }\n\n";
- O.indent(Indent1)
- << "void PopulateCompilationGraph(CompilationGraph& graph) const\n";
- O.indent(Indent1) << "{ PopulateCompilationGraphLocal(graph); }\n"
- << "};\n\n"
- << "static llvmc::RegisterPlugin<Plugin> RP;\n\n";
}
/// EmitIncludes - Emit necessary #include directives and some
@@ -2916,8 +2971,7 @@ void EmitRegisterPlugin(int Priority, raw_ostream& O) {
void EmitIncludes(raw_ostream& O) {
O << "#include \"llvm/CompilerDriver/BuiltinOptions.h\"\n"
<< "#include \"llvm/CompilerDriver/CompilationGraph.h\"\n"
- << "#include \"llvm/CompilerDriver/ForceLinkageMacros.h\"\n"
- << "#include \"llvm/CompilerDriver/Plugin.h\"\n"
+ << "#include \"llvm/CompilerDriver/Error.h\"\n"
<< "#include \"llvm/CompilerDriver/Tool.h\"\n\n"
<< "#include \"llvm/Support/CommandLine.h\"\n"
@@ -2931,21 +2985,17 @@ void EmitIncludes(raw_ostream& O) {
<< "using namespace llvm;\n"
<< "using namespace llvmc;\n\n"
- << "extern cl::opt<std::string> OutputFilename;\n\n"
-
<< "inline const char* checkCString(const char* s)\n"
<< "{ return s == NULL ? \"\" : s; }\n\n";
}
-/// PluginData - Holds all information about a plugin.
-struct PluginData {
+/// DriverData - Holds all information about the driver.
+struct DriverData {
OptionDescriptions OptDescs;
- bool HasSink;
- bool HasExterns;
ToolDescriptions ToolDescs;
- RecordVector Edges;
- int Priority;
+ DagVector Edges;
+ bool HasSink;
};
/// HasSink - Go through the list of tool descriptions and check if
@@ -2959,46 +3009,27 @@ bool HasSink(const ToolDescriptions& ToolDescs) {
return false;
}
-/// HasExterns - Go through the list of option descriptions and check
-/// if there are any external options.
-bool HasExterns(const OptionDescriptions& OptDescs) {
- for (OptionDescriptions::const_iterator B = OptDescs.begin(),
- E = OptDescs.end(); B != E; ++B)
- if (B->second.isExtern())
- return true;
-
- return false;
-}
-
-/// CollectPluginData - Collect tool and option properties,
-/// compilation graph edges and plugin priority from the parse tree.
-void CollectPluginData (const RecordKeeper& Records, PluginData& Data) {
+/// CollectDriverData - Collect compilation graph edges, tool properties and
+/// option properties from the parse tree.
+void CollectDriverData (const RecordKeeper& Records, DriverData& Data) {
// Collect option properties.
const RecordVector& OptionLists =
Records.getAllDerivedDefinitions("OptionList");
- CollectOptionDescriptions(OptionLists.begin(), OptionLists.end(),
- Data.OptDescs);
+ CollectOptionDescriptions(OptionLists, Data.OptDescs);
// Collect tool properties.
const RecordVector& Tools = Records.getAllDerivedDefinitions("Tool");
- CollectToolDescriptions(Tools.begin(), Tools.end(), Data.ToolDescs);
+ CollectToolDescriptions(Tools, Data.ToolDescs);
Data.HasSink = HasSink(Data.ToolDescs);
- Data.HasExterns = HasExterns(Data.OptDescs);
// Collect compilation graph edges.
const RecordVector& CompilationGraphs =
Records.getAllDerivedDefinitions("CompilationGraph");
- FillInEdgeVector(CompilationGraphs.begin(), CompilationGraphs.end(),
- Data.Edges);
-
- // Calculate the priority of this plugin.
- const RecordVector& Priorities =
- Records.getAllDerivedDefinitions("PluginPriority");
- Data.Priority = CalculatePriority(Priorities.begin(), Priorities.end());
+ FillInEdgeVector(CompilationGraphs, Data.Edges);
}
-/// CheckPluginData - Perform some sanity checks on the collected data.
-void CheckPluginData(PluginData& Data) {
+/// CheckDriverData - Perform some sanity checks on the collected data.
+void CheckDriverData(DriverData& Data) {
// Filter out all tools not mentioned in the compilation graph.
FilterNotInGraph(Data.Edges, Data.ToolDescs);
@@ -3010,24 +3041,24 @@ void CheckPluginData(PluginData& Data) {
CheckForSuperfluousOptions(Data.Edges, Data.ToolDescs, Data.OptDescs);
}
-void EmitPluginCode(const PluginData& Data, raw_ostream& O) {
+void EmitDriverCode(const DriverData& Data, raw_ostream& O) {
// Emit file header.
EmitIncludes(O);
// Emit global option registration code.
- EmitOptionDefinitions(Data.OptDescs, Data.HasSink, Data.HasExterns, O);
+ O << "namespace llvmc {\n"
+ << "namespace autogenerated {\n\n";
+ EmitOptionDefinitions(Data.OptDescs, Data.HasSink, O);
+ O << "} // End namespace autogenerated.\n"
+ << "} // End namespace llvmc.\n\n";
// Emit hook declarations.
+ O << "namespace hooks {\n";
EmitHookDeclarations(Data.ToolDescs, Data.OptDescs, O);
+ O << "} // End namespace hooks.\n\n";
O << "namespace {\n\n";
-
- // Emit PreprocessOptionsLocal() function.
- EmitPreprocessOptions(Records, Data.OptDescs, O);
-
- // Emit PopulateLanguageMapLocal() function
- // (language map maps from file extensions to language names).
- EmitPopulateLanguageMap(Records, O);
+ O << "using namespace llvmc::autogenerated;\n\n";
// Emit Tool classes.
for (ToolDescriptions::const_iterator B = Data.ToolDescs.begin(),
@@ -3037,18 +3068,23 @@ void EmitPluginCode(const PluginData& Data, raw_ostream& O) {
// Emit Edge# classes.
EmitEdgeClasses(Data.Edges, Data.OptDescs, O);
- // Emit PopulateCompilationGraphLocal() function.
- EmitPopulateCompilationGraph(Data.Edges, Data.ToolDescs, O);
-
- // Emit code for plugin registration.
- EmitRegisterPlugin(Data.Priority, O);
-
O << "} // End anonymous namespace.\n\n";
- // Force linkage magic.
O << "namespace llvmc {\n";
- O << "LLVMC_FORCE_LINKAGE_DECL(LLVMC_PLUGIN_NAME) {}\n";
- O << "}\n";
+ O << "namespace autogenerated {\n\n";
+
+ // Emit PreprocessOptions() function.
+ EmitPreprocessOptions(Records, Data.OptDescs, O);
+
+ // Emit PopulateLanguageMap() function
+ // (language map maps from file extensions to language names).
+ EmitPopulateLanguageMap(Records, O);
+
+ // Emit PopulateCompilationGraph() function.
+ EmitPopulateCompilationGraph(Data.Edges, Data.ToolDescs, O);
+
+ O << "} // End namespace autogenerated.\n";
+ O << "} // End namespace llvmc.\n\n";
// EOF
}
@@ -3060,13 +3096,13 @@ void EmitPluginCode(const PluginData& Data, raw_ostream& O) {
/// run - The back-end entry point.
void LLVMCConfigurationEmitter::run (raw_ostream &O) {
try {
- PluginData Data;
+ DriverData Data;
- CollectPluginData(Records, Data);
- CheckPluginData(Data);
+ CollectDriverData(Records, Data);
+ CheckDriverData(Data);
- this->EmitSourceFileHeader("LLVMC Configuration Library", O);
- EmitPluginCode(Data, O);
+ this->EmitSourceFileHeader("llvmc-based driver: auto-generated code", O);
+ EmitDriverCode(Data, O);
} catch (std::exception& Error) {
throw Error.what() + std::string(" - usually this means a syntax error.");
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 3516d3188555..0a12f3766699 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -145,6 +145,9 @@ static char ModType(const char mod, char type, bool &quad, bool &poly,
type = 'f';
usgn = false;
break;
+ case 'g':
+ quad = false;
+ break;
case 'w':
type = Widen(type);
quad = true;
@@ -686,15 +689,15 @@ static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
bool cnst = false;
bool pntr = false;
- // base type to get the type string for.
+ // Base type to get the type string for.
char type = ClassifyType(typestr, quad, poly, usgn);
// Based on the modifying character, change the type and width if necessary.
type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
-
+
if (usgn)
ret |= 0x08;
- if (quad)
+ if (quad && proto[1] != 'g')
ret |= 0x10;
switch (type) {
@@ -1016,6 +1019,8 @@ static unsigned RangeFromType(StringRef typestr) {
throw "unhandled type!";
break;
}
+ assert(0 && "unreachable");
+ return 0;
}
/// runHeader - Emit a file with sections defining:
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index d2cf379907f6..dc793586fbee 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -628,23 +628,6 @@ std::string UnOpInit::getAsString() const {
return Result + "(" + LHS->getAsString() + ")";
}
-RecTy *UnOpInit::getFieldType(const std::string &FieldName) const {
- switch (getOpcode()) {
- default: assert(0 && "Unknown unop");
- case CAST: {
- RecordRecTy *RecordType = dynamic_cast<RecordRecTy *>(getType());
- if (RecordType) {
- RecordVal *Field = RecordType->getRecord()->getValue(FieldName);
- if (Field) {
- return Field->getType();
- }
- }
- break;
- }
- }
- return 0;
-}
-
Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
switch (getOpcode()) {
default: assert(0 && "Unknown binop");
@@ -1046,6 +1029,17 @@ std::string TernOpInit::getAsString() const {
+ RHS->getAsString() + ")";
}
+RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
+ RecordRecTy *RecordType = dynamic_cast<RecordRecTy *>(getType());
+ if (RecordType) {
+ RecordVal *Field = RecordType->getRecord()->getValue(FieldName);
+ if (Field) {
+ return Field->getType();
+ }
+ }
+ return 0;
+}
+
Init *TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
BitsRecTy *T = dynamic_cast<BitsRecTy*>(getType());
if (T == 0) return 0; // Cannot subscript a non-bits variable...
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 8f9fd950bb0f..d6f37eec749e 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -535,6 +535,12 @@ public:
virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
virtual Init *convertInitListSlice(const std::vector<unsigned> &Elements);
+ /// getFieldType - This method is used to implement the FieldInit class.
+ /// Implementors of this method should return the type of the named field if
+ /// they are of record type.
+ ///
+ virtual RecTy *getFieldType(const std::string &FieldName) const;
+
/// resolveBitReference - This method is used to implement
/// VarBitInit::resolveReferences. If the bit is able to be resolved, we
/// simply return the resolved value, otherwise we return null.
@@ -835,12 +841,6 @@ public:
virtual Init *resolveReferences(Record &R, const RecordVal *RV);
- /// getFieldType - This method is used to implement the FieldInit class.
- /// Implementors of this method should return the type of the named field if
- /// they are of record type.
- ///
- virtual RecTy *getFieldType(const std::string &FieldName) const;
-
virtual std::string getAsString() const;
};
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index a3ca0bc5521a..6f06705243e9 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -119,16 +119,6 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
OS << "} // End llvm namespace \n";
}
-bool isSubRegisterClass(const CodeGenRegisterClass &RC,
- std::set<Record*> &RegSet) {
- for (unsigned i = 0, e = RC.Elements.size(); i != e; ++i) {
- Record *Reg = RC.Elements[i];
- if (!RegSet.count(Reg))
- return false;
- }
- return true;
-}
-
static void addSuperReg(Record *R, Record *S,
std::map<Record*, std::set<Record*>, LessRecord> &SubRegs,
std::map<Record*, std::set<Record*>, LessRecord> &SuperRegs,
@@ -498,12 +488,6 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
// Give the register class a legal C name if it's anonymous.
std::string Name = RC.TheDef->getName();
- std::set<Record*> RegSet;
- for (unsigned i = 0, e = RC.Elements.size(); i != e; ++i) {
- Record *Reg = RC.Elements[i];
- RegSet.insert(Reg);
- }
-
OS << " // " << Name
<< " Register Class sub-classes...\n"
<< " static const TargetRegisterClass* const "
@@ -513,21 +497,9 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) {
const CodeGenRegisterClass &RC2 = RegisterClasses[rc2];
- // RC2 is a sub-class of RC if it is a valid replacement for any
- // instruction operand where an RC register is required. It must satisfy
- // these conditions:
- //
- // 1. All RC2 registers are also in RC.
- // 2. The RC2 spill size must not be smaller that the RC spill size.
- // 3. RC2 spill alignment must be compatible with RC.
- //
// Sub-classes are used to determine if a virtual register can be used
// as an instruction operand, or if it must be copied first.
-
- if (rc == rc2 || RC2.Elements.size() > RC.Elements.size() ||
- (RC.SpillAlignment && RC2.SpillAlignment % RC.SpillAlignment) ||
- RC.SpillSize > RC2.SpillSize || !isSubRegisterClass(RC2, RegSet))
- continue;
+ if (rc == rc2 || !RC.hasSubClass(&RC2)) continue;
if (!Empty) OS << ", ";
OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 7a4f74ff6a9a..5e3e2829b87f 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -55,7 +55,10 @@ enum ActionType {
GenDisassembler,
GenCallingConv,
GenClangAttrClasses,
+ GenClangAttrImpl,
GenClangAttrList,
+ GenClangAttrPCHRead,
+ GenClangAttrPCHWrite,
GenClangDiagsDefs,
GenClangDiagGroups,
GenClangDeclNodes,
@@ -67,7 +70,7 @@ enum ActionType {
GenIntrinsic,
GenTgtIntrinsic,
GenLLVMCConf,
- GenEDHeader, GenEDInfo,
+ GenEDInfo,
GenArmNeon,
GenArmNeonSema,
PrintEnums
@@ -116,8 +119,14 @@ namespace {
"Generate target intrinsic information"),
clEnumValN(GenClangAttrClasses, "gen-clang-attr-classes",
"Generate clang attribute clases"),
+ clEnumValN(GenClangAttrImpl, "gen-clang-attr-impl",
+ "Generate clang attribute implementations"),
clEnumValN(GenClangAttrList, "gen-clang-attr-list",
"Generate a clang attribute list"),
+ clEnumValN(GenClangAttrPCHRead, "gen-clang-attr-pch-read",
+ "Generate clang PCH attribute reader"),
+ clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write",
+ "Generate clang PCH attribute writer"),
clEnumValN(GenClangDiagsDefs, "gen-clang-diags-defs",
"Generate Clang diagnostics definitions"),
clEnumValN(GenClangDiagGroups, "gen-clang-diag-groups",
@@ -128,8 +137,6 @@ namespace {
"Generate Clang AST statement nodes"),
clEnumValN(GenLLVMCConf, "gen-llvmc",
"Generate LLVMC configuration library"),
- clEnumValN(GenEDHeader, "gen-enhanced-disassembly-header",
- "Generate enhanced disassembly info header"),
clEnumValN(GenEDInfo, "gen-enhanced-disassembly-info",
"Generate enhanced disassembly info"),
clEnumValN(GenArmNeon, "gen-arm-neon",
@@ -209,116 +216,119 @@ int main(int argc, char **argv) {
return 1;
std::string Error;
- raw_fd_ostream Out(OutputFilename.c_str(), Error);
+ tool_output_file Out(OutputFilename.c_str(), Error);
if (!Error.empty()) {
errs() << argv[0] << ": error opening " << OutputFilename
<< ":" << Error << "\n";
return 1;
}
- // Make sure the file gets removed if *gasp* tablegen crashes...
- sys::RemoveFileOnSignal(sys::Path(OutputFilename));
-
try {
switch (Action) {
case PrintRecords:
- Out << Records; // No argument, dump all contents
+ Out.os() << Records; // No argument, dump all contents
break;
case GenEmitter:
- CodeEmitterGen(Records).run(Out);
+ CodeEmitterGen(Records).run(Out.os());
break;
case GenRegisterEnums:
- RegisterInfoEmitter(Records).runEnums(Out);
+ RegisterInfoEmitter(Records).runEnums(Out.os());
break;
case GenRegister:
- RegisterInfoEmitter(Records).run(Out);
+ RegisterInfoEmitter(Records).run(Out.os());
break;
case GenRegisterHeader:
- RegisterInfoEmitter(Records).runHeader(Out);
+ RegisterInfoEmitter(Records).runHeader(Out.os());
break;
case GenInstrEnums:
- InstrEnumEmitter(Records).run(Out);
+ InstrEnumEmitter(Records).run(Out.os());
break;
case GenInstrs:
- InstrInfoEmitter(Records).run(Out);
+ InstrInfoEmitter(Records).run(Out.os());
break;
case GenCallingConv:
- CallingConvEmitter(Records).run(Out);
+ CallingConvEmitter(Records).run(Out.os());
break;
case GenAsmWriter:
- AsmWriterEmitter(Records).run(Out);
+ AsmWriterEmitter(Records).run(Out.os());
break;
case GenARMDecoder:
- ARMDecoderEmitter(Records).run(Out);
+ ARMDecoderEmitter(Records).run(Out.os());
break;
case GenAsmMatcher:
- AsmMatcherEmitter(Records).run(Out);
+ AsmMatcherEmitter(Records).run(Out.os());
break;
case GenClangAttrClasses:
- ClangAttrClassEmitter(Records).run(Out);
+ ClangAttrClassEmitter(Records).run(Out.os());
+ break;
+ case GenClangAttrImpl:
+ ClangAttrImplEmitter(Records).run(Out.os());
break;
case GenClangAttrList:
- ClangAttrListEmitter(Records).run(Out);
+ ClangAttrListEmitter(Records).run(Out.os());
+ break;
+ case GenClangAttrPCHRead:
+ ClangAttrPCHReadEmitter(Records).run(Out.os());
+ break;
+ case GenClangAttrPCHWrite:
+ ClangAttrPCHWriteEmitter(Records).run(Out.os());
break;
case GenClangDiagsDefs:
- ClangDiagsDefsEmitter(Records, ClangComponent).run(Out);
+ ClangDiagsDefsEmitter(Records, ClangComponent).run(Out.os());
break;
case GenClangDiagGroups:
- ClangDiagGroupsEmitter(Records).run(Out);
+ ClangDiagGroupsEmitter(Records).run(Out.os());
break;
case GenClangDeclNodes:
- ClangASTNodesEmitter(Records, "Decl", "Decl").run(Out);
- ClangDeclContextEmitter(Records).run(Out);
+ ClangASTNodesEmitter(Records, "Decl", "Decl").run(Out.os());
+ ClangDeclContextEmitter(Records).run(Out.os());
break;
case GenClangStmtNodes:
- ClangASTNodesEmitter(Records, "Stmt", "").run(Out);
+ ClangASTNodesEmitter(Records, "Stmt", "").run(Out.os());
break;
case GenDisassembler:
- DisassemblerEmitter(Records).run(Out);
+ DisassemblerEmitter(Records).run(Out.os());
break;
case GenOptParserDefs:
- OptParserEmitter(Records, true).run(Out);
+ OptParserEmitter(Records, true).run(Out.os());
break;
case GenOptParserImpl:
- OptParserEmitter(Records, false).run(Out);
+ OptParserEmitter(Records, false).run(Out.os());
break;
case GenDAGISel:
- DAGISelEmitter(Records).run(Out);
+ DAGISelEmitter(Records).run(Out.os());
break;
case GenFastISel:
- FastISelEmitter(Records).run(Out);
+ FastISelEmitter(Records).run(Out.os());
break;
case GenSubtarget:
- SubtargetEmitter(Records).run(Out);
+ SubtargetEmitter(Records).run(Out.os());
break;
case GenIntrinsic:
- IntrinsicEmitter(Records).run(Out);
+ IntrinsicEmitter(Records).run(Out.os());
break;
case GenTgtIntrinsic:
- IntrinsicEmitter(Records, true).run(Out);
+ IntrinsicEmitter(Records, true).run(Out.os());
break;
case GenLLVMCConf:
- LLVMCConfigurationEmitter(Records).run(Out);
- break;
- case GenEDHeader:
- EDEmitter(Records).runHeader(Out);
+ LLVMCConfigurationEmitter(Records).run(Out.os());
break;
case GenEDInfo:
- EDEmitter(Records).run(Out);
+ EDEmitter(Records).run(Out.os());
break;
case GenArmNeon:
- NeonEmitter(Records).run(Out);
+ NeonEmitter(Records).run(Out.os());
break;
case GenArmNeonSema:
- NeonEmitter(Records).runHeader(Out);
+ NeonEmitter(Records).runHeader(Out.os());
break;
case PrintEnums:
{
std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
for (unsigned i = 0, e = Recs.size(); i != e; ++i)
- Out << Recs[i]->getName() << ", ";
- Out << "\n";
+ Out.os() << Recs[i]->getName() << ", ";
+ Out.os() << "\n";
break;
}
default:
@@ -326,6 +336,8 @@ int main(int argc, char **argv) {
return 1;
}
+ // Declare success.
+ Out.keep();
return 0;
} catch (const TGError &Error) {
@@ -340,7 +352,5 @@ int main(int argc, char **argv) {
errs() << argv[0] << ": Unknown unexpected exception occurred.\n";
}
- if (OutputFilename != "-")
- std::remove(OutputFilename.c_str()); // Remove the file, it's broken
return 1;
}
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index d17585f8f33b..54577e2ef538 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -49,8 +49,9 @@ endif
# Default to not install libLTO.dylib.
INSTALL_LIBLTO := no
-# Default to do a native build, not a cross-build for an ARM host.
+# Default to do a native build, not a cross-build for an ARM host or simulator.
ARM_HOSTED_BUILD := no
+IOS_SIM_BUILD := no
ifndef RC_ProjectSourceVersion
RC_ProjectSourceVersion = 9999
@@ -66,11 +67,18 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
$(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
$(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
$(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \
- $(ARM_HOSTED_BUILD) \
+ $(ARM_HOSTED_BUILD) $(IOS_SIM_BUILD) \
$(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion)
EmbeddedHosted:
- $(MAKE) ARM_HOSTED_BUILD=yes PREFIX=/usr install
+ $(MAKE) ARM_HOSTED_BUILD=yes PREFIX=/usr/local install
+
+# When building for the iOS simulator, MACOSX_DEPLOYMENT_TARGET is not set
+# by default, but it needs to be set when building tools that run on the host
+# (e.g., tblgen), so set it here.
+EmbeddedSim:
+ export MACOSX_DEPLOYMENT_TARGET=`sw_vers -productVersion`; \
+ $(MAKE) IOS_SIM_BUILD=yes PREFIX=$(SDKROOT)/usr/local install
# installhdrs does nothing, because the headers aren't useful until
# the compiler is installed.
@@ -120,4 +128,4 @@ clean:
$(OBJROOT) $(SYMROOT) $(DSTROOT):
mkdir -p $@
-.PHONY: install installsrc clean EmbeddedHosted
+.PHONY: install installsrc clean EmbeddedHosted EmbeddedSim
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 37ef16e6df63..39ec1ccda9c0 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -49,11 +49,14 @@ INSTALL_LIBLTO="$9"
# A yes/no parameter that controls whether to cross-build for an ARM host.
ARM_HOSTED_BUILD="${10}"
+# A yes/no parameter that controls whether to cross-build for the iOS simulator
+IOS_SIM_BUILD="${11}"
+
# The version number of the submission, e.g. 1007.
-LLVM_SUBMIT_VERSION="${11}"
+LLVM_SUBMIT_VERSION="${12}"
# The subversion number of the submission, e.g. 03.
-LLVM_SUBMIT_SUBVERSION="${12}"
+LLVM_SUBMIT_SUBVERSION="${13}"
# The current working directory is where the build will happen. It may already
# contain a partial result of an interrupted build, in which case this script
@@ -97,7 +100,7 @@ if [ "$ARM_HOSTED_BUILD" = yes ]; then
# Try to use the platform llvm-gcc. Fall back to gcc if it's not available.
for prog in gcc g++ ; do
P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
- T=`xcrun -find llvm-${prog}`
+ T=`xcrun -sdk $SDKROOT -find llvm-${prog}`
if [ "x$T" = "x" ] ; then
T=`xcrun -sdk $SDKROOT -find ${prog}`
fi
@@ -124,6 +127,10 @@ fi
if [ "$ARM_HOSTED_BUILD" = yes ]; then
configure_opts="--enable-targets=arm --host=arm-apple-darwin10 \
--target=arm-apple-darwin10 --build=i686-apple-darwin10"
+elif [ "$IOS_SIM_BUILD" = yes ]; then
+ # Use a non-standard "darwin_sim" host triple to trigger a cross-build.
+ configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
+ --build=i686-apple-darwin10"
else
configure_opts="--enable-targets=arm,x86,powerpc,cbe"
fi
@@ -317,9 +324,20 @@ if [ "$INSTALL_LIBLTO" = "yes" ]; then
mkdir -p $DT_HOME/lib
mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
+ # Save a copy of the unstripped dylib
+ mkdir -p $SYM_DIR/Developer/usr/lib
+ cp $DT_HOME/lib/libLTO.dylib $SYM_DIR/Developer/usr/lib/libLTO.dylib
+
# Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
# PPC objects!
strip -arch all -Sl $DT_HOME/lib/libLTO.dylib
+
+ if [ "x$DISABLE_USR_LINKS" == "x" ]; then
+ # Add a symlink in /usr/lib for B&I.
+ mkdir -p $DEST_DIR/usr/lib/
+ (cd $DEST_DIR/usr/lib && \
+ ln -s ../../Developer/usr/lib/libLTO.dylib ./libLTO.dylib)
+ fi
else
rm -f lib/libLTO.dylib
fi
@@ -350,15 +368,6 @@ chgrp -R wheel $DEST_DIR
rm -rf $DEST_DIR$DEST_ROOT/docs
################################################################################
-# symlinks so that B&I can find things
-
-if [ "$INSTALL_LIBLTO" = "yes" ]; then
- mkdir -p $DEST_DIR/usr/lib/
- cd $DEST_DIR/usr/lib && \
- ln -s ../../Developer/usr/lib/libLTO.dylib ./libLTO.dylib
-fi
-
-################################################################################
# w00t! Done!
exit 0
diff --git a/utils/lit/lit/ExampleTests/lit.cfg b/utils/lit/lit/ExampleTests/lit.cfg
index dbd574f8bd10..20ee37dcef27 100644
--- a/utils/lit/lit/ExampleTests/lit.cfg
+++ b/utils/lit/lit/ExampleTests/lit.cfg
@@ -21,3 +21,6 @@ config.test_exec_root = None
# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
config.target_triple = 'foo'
+
+# available_features: Used by ShTest and TclTest formats for REQUIRES checks.
+config.available_features = ['some-feature-name']
diff --git a/utils/lit/lit/ExampleTests/required-and-missing.c b/utils/lit/lit/ExampleTests/required-and-missing.c
new file mode 100644
index 000000000000..47ba72e4a314
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/required-and-missing.c
@@ -0,0 +1,4 @@
+// This test shouldn't be run, the required feature is missing.
+//
+// RUN: false
+// REQUIRES: some-missing-feature-name
diff --git a/utils/lit/lit/ExampleTests/required-and-present.c b/utils/lit/lit/ExampleTests/required-and-present.c
new file mode 100644
index 000000000000..2a09e08e5ae9
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/required-and-present.c
@@ -0,0 +1,2 @@
+// RUN: true
+// REQUIRES: some-feature-name
diff --git a/utils/lit/lit/TestFormats.py b/utils/lit/lit/TestFormats.py
index e52d0e4e1c7c..7ffbd2bf7663 100644
--- a/utils/lit/lit/TestFormats.py
+++ b/utils/lit/lit/TestFormats.py
@@ -1,14 +1,21 @@
import os
+import platform
import Test
import TestRunner
import Util
+kIsWindows = platform.system() == 'Windows'
+
class GoogleTest(object):
def __init__(self, test_sub_dir, test_suffix):
self.test_sub_dir = str(test_sub_dir)
self.test_suffix = str(test_suffix)
+ # On Windows, assume tests will also end in '.exe'.
+ if kIsWindows:
+ self.test_suffix += '.exe'
+
def getGTestTests(self, path, litConfig, localConfig):
"""getGTestTests(path) - [name]
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index cdf1c938af8c..0eb51a829408 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -312,11 +312,6 @@ def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
out,err,exitCode = executeCommand(command, cwd=cwd,
env=test.config.environment)
- # Tcl commands fail on standard error output.
- if err:
- exitCode = 1
- out = 'Command has output on stderr!\n\n' + out
-
return out,err,exitCode
else:
results = []
@@ -328,11 +323,6 @@ def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
out = err = ''
- # Tcl commands fail on standard error output.
- if [True for _,_,err,res in results if err]:
- exitCode = 1
- out += 'Command has output on stderr!\n\n'
-
for i,(cmd, cmd_out, cmd_err, res) in enumerate(results):
out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
out += 'Command %d Result: %r\n' % (i, res)
@@ -422,6 +412,7 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
script = []
xfails = []
xtargets = []
+ requires = []
for ln in open(sourcepath):
if 'RUN:' in ln:
# Isolate the command to run.
@@ -442,6 +433,9 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
elif 'XTARGET:' in ln:
items = ln[ln.index('XTARGET:') + 8:].split(',')
xtargets.extend([s.strip() for s in items])
+ elif 'REQUIRES:' in ln:
+ items = ln[ln.index('REQUIRES:') + 9:].split(',')
+ requires.extend([s.strip() for s in items])
elif 'END.' in ln:
# Check for END. lines.
if ln[ln.index('END.'):].strip() == 'END.':
@@ -461,27 +455,42 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
if not script:
return (Test.UNRESOLVED, "Test has no run line!")
+ # Check for unterminated run lines.
if script[-1][-1] == '\\':
return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
+ # Check that we have the required features:
+ missing_required_features = [f for f in requires
+ if f not in test.config.available_features]
+ if missing_required_features:
+ msg = ', '.join(missing_required_features)
+ return (Test.UNSUPPORTED,
+ "Test requires the following features: %s" % msg)
+
isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
return script,isXFail,tmpBase,execdir
-def formatTestOutput(status, out, err, exitCode, script):
+def formatTestOutput(status, out, err, exitCode, failDueToStderr, script):
output = StringIO.StringIO()
print >>output, "Script:"
print >>output, "--"
print >>output, '\n'.join(script)
print >>output, "--"
- print >>output, "Exit Code: %r" % exitCode
- print >>output, "Command Output (stdout):"
- print >>output, "--"
- output.write(out)
- print >>output, "--"
- print >>output, "Command Output (stderr):"
- print >>output, "--"
- output.write(err)
- print >>output, "--"
+ print >>output, "Exit Code: %r" % exitCode,
+ if failDueToStderr:
+ print >>output, "(but there was output on stderr)"
+ else:
+ print >>output
+ if out:
+ print >>output, "Command Output (stdout):"
+ print >>output, "--"
+ output.write(out)
+ print >>output, "--"
+ if err:
+ print >>output, "Command Output (stderr):"
+ print >>output, "--"
+ output.write(err)
+ print >>output, "--"
return (status, output.getvalue())
def executeTclTest(test, litConfig):
@@ -506,18 +515,30 @@ def executeTclTest(test, litConfig):
if len(res) == 2:
return res
+ # Test for failure. In addition to the exit code, Tcl commands are
+ # considered to fail if there is any standard error output.
out,err,exitCode = res
if isXFail:
- ok = exitCode != 0
- status = (Test.XPASS, Test.XFAIL)[ok]
+ ok = exitCode != 0 or err
+ if ok:
+ status = Test.XFAIL
+ else:
+ status = Test.XPASS
else:
- ok = exitCode == 0
- status = (Test.FAIL, Test.PASS)[ok]
+ ok = exitCode == 0 and not err
+ if ok:
+ status = Test.PASS
+ else:
+ status = Test.FAIL
if ok:
return (status,'')
- return formatTestOutput(status, out, err, exitCode, script)
+ # Set a flag for formatTestOutput so it can explain why the test was
+ # considered to have failed, despite having an exit code of 0.
+ failDueToStderr = exitCode == 0 and err
+
+ return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
def executeShTest(test, litConfig, useExternalSh):
if test.config.unsupported:
@@ -545,12 +566,21 @@ def executeShTest(test, litConfig, useExternalSh):
out,err,exitCode = res
if isXFail:
ok = exitCode != 0
- status = (Test.XPASS, Test.XFAIL)[ok]
+ if ok:
+ status = Test.XFAIL
+ else:
+ status = Test.XPASS
else:
ok = exitCode == 0
- status = (Test.FAIL, Test.PASS)[ok]
+ if ok:
+ status = Test.PASS
+ else:
+ status = Test.FAIL
if ok:
return (status,'')
- return formatTestOutput(status, out, err, exitCode, script)
+ # Sh tests are not considered to fail just from stderr output.
+ failDueToStderr = False
+
+ return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index dd905ef3ee10..5c1b27394857 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -28,7 +28,8 @@ class TestingConfig:
on_clone = None,
test_exec_root = None,
test_source_root = None,
- excludes = [])
+ excludes = [],
+ available_features = [])
if os.path.exists(path):
# FIXME: Improve detection and error reporting of errors in the
@@ -54,7 +55,8 @@ class TestingConfig:
def __init__(self, parent, name, suffixes, test_format,
environment, substitutions, unsupported, on_clone,
- test_exec_root, test_source_root, excludes):
+ test_exec_root, test_source_root, excludes,
+ available_features):
self.parent = parent
self.name = str(name)
self.suffixes = set(suffixes)
@@ -66,6 +68,7 @@ class TestingConfig:
self.test_exec_root = test_exec_root
self.test_source_root = test_source_root
self.excludes = set(excludes)
+ self.available_features = set(available_features)
def clone(self, path):
# FIXME: Chain implementations?
@@ -75,7 +78,7 @@ class TestingConfig:
self.environment, self.substitutions,
self.unsupported, self.on_clone,
self.test_exec_root, self.test_source_root,
- self.excludes)
+ self.excludes, self.available_features)
if cfg.on_clone:
cfg.on_clone(self, cfg, path)
return cfg
diff --git a/utils/lit/lit/lit.py b/utils/lit/lit/lit.py
index db0653f7966d..13d263009ddd 100755
--- a/utils/lit/lit/lit.py
+++ b/utils/lit/lit/lit.py
@@ -358,8 +358,7 @@ def load_test_suite(inputs):
from LitTestCase import LitTestCase
return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
-def main():
- # Bump the GIL check interval, its more important to get any one thread to a
+def main(builtinParameters = {}): # Bump the GIL check interval, its more important to get any one thread to a
# blocking operation (hopefully exec) than to try and unblock other threads.
#
# FIXME: This is a hack.
@@ -469,7 +468,7 @@ def main():
inputs = args
# Create the user defined parameters.
- userParams = {}
+ userParams = dict(builtinParameters)
for entry in opts.userParameters:
if '=' not in entry:
name,val = entry,''
diff --git a/utils/llvm-lit/Makefile b/utils/llvm-lit/Makefile
new file mode 100644
index 000000000000..702591f49591
--- /dev/null
+++ b/utils/llvm-lit/Makefile
@@ -0,0 +1,21 @@
+##===- utils/llvm-lit/Makefile -----------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+include $(LEVEL)/Makefile.common
+
+all:: $(ToolDir)/llvm-lit
+
+$(ToolDir)/llvm-lit: llvm-lit.in $(ToolDir)/.dir
+ $(Echo) "Creating 'llvm-lit' script..."
+ $(Verb)sed -e "s#@LLVM_SOURCE_DIR@#$(LLVM_SRC_ROOT)#g" \
+ -e "s#@LLVM_BINARY_DIR@#$(LLVM_OBJ_ROOT)#g" \
+ $< > $@
+ $(Verb)chmod +x $@
diff --git a/utils/llvm-lit/llvm-lit.in b/utils/llvm-lit/llvm-lit.in
new file mode 100644
index 000000000000..3ff2c2489c45
--- /dev/null
+++ b/utils/llvm-lit/llvm-lit.in
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+# Variables configured at build time.
+llvm_source_root = "@LLVM_SOURCE_DIR@"
+llvm_obj_root = "@LLVM_BINARY_DIR@"
+
+# Make sure we can find the lit package.
+sys.path.append(os.path.join(llvm_source_root, 'utils', 'lit'))
+
+# Set up some builtin parameters, so that by default the LLVM test suite
+# configuration file knows how to find the object tree.
+builtin_parameters = {
+ 'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+ }
+
+if __name__=='__main__':
+ import lit
+ lit.main(builtin_parameters)
diff --git a/utils/llvm.grm b/utils/llvm.grm
index fa0dcd1e1520..9d6bdf79f539 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -8,6 +8,8 @@ It is strictly syntax-based, and makes no attempt to generate
IR that is semantically valid. Most of the IR produced doesn't
pass the Verifier.
+TODO: Metadata, in all its forms
+
*)
I ::= "title: LLVM assembly language\n"
@@ -90,6 +92,8 @@ GVInternalLinkage
| dllexport
| common
| private
+ | "linker_private"
+ | "linker_private_weak"
;
GVExternalLinkage
diff --git a/utils/llvmdo b/utils/llvmdo
index 4a7e05af9915..bcfc221c2b1b 100755
--- a/utils/llvmdo
+++ b/utils/llvmdo
@@ -76,8 +76,6 @@ fi
shift;
paths_to_ignore="\
- -path */CVS -o \
- -path */CVS/* -o \
-path */.svn/ -o \
-path */.svn/* -o \
-path docs/doxygen/* -o \
@@ -130,7 +128,6 @@ files_to_match="\
-o -name llvmgrep \
-o -name check-each-file \
-o -name codgen-diff \
- -o -name cvsupdate \
-o -name llvm-native-gcc \
-o -name llvm-native-gxx \
-o -name makellvm \
@@ -153,7 +150,6 @@ files_to_ignore="\
-name \.* \
-o -name *~ \
-o -name #* \
- -o -name *.cvs \
-o -name configure \
-o -name slow.ll \
-o -name *libtool* \
diff --git a/utils/mkpatch b/utils/mkpatch
deleted file mode 100755
index 2741563f041c..000000000000
--- a/utils/mkpatch
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-#
-# This script makes a patch for LLVM ensuring the correct diff options and
-# putting the files in a standard review order.
-
-
-function error {
- retcode="$?"
- echo "mkpatch: error: $1 ($retcode)"
- exit 1
-}
-
-if [ ! -e llvm.spec.in ] ; then
- error "Please change directory to the LLVM top source directory"
-fi
-if [ "$#" -ne 1 ] ; then
- error "usage: utils/mkpatch [PATCH_NAME]"
-fi
-NAME="$1"
-echo "mkpatch: Generating differences on top level files"
-svn diff -N -x -u > "$NAME".patch.raw 2>&1
-echo "mkpatch: Generating differences on all directories"
-svn diff -x -u >> "$NAME".patch.raw 2>&1 \
- autoconf docs utils include lib/System lib/Support lib/VMCore lib/AsmParser \
- lib/Bitcode lib/Analysis lib/Transforms lib/CodeGen lib/Target \
- lib/ExecutionEngine lib/Linker lib/MC \
- tools test unittests runtime projects examples Xcode
-
-echo "mkpatch: Removing cruft from the patch file"
-sed -e '/^[?] .*/d' -e '/^cvs diff: Diffing/d' "$NAME".patch.raw | awk '\
-BEGIN { deleting = 0; } \
-/^Index: .*[.]cvs$/ { deleting = 1; fname=substr($0,7); \
- print "Skipping: ", fname > "/dev/stderr"; } \
-/^Index:.*/ && !/^Index: .*[.]cvs$/ { deleting = 0; } \
-{ if (! deleting) { print; } } ' > "$NAME".patch || \
- error "sed/awk cleanup failed"
-
diff --git a/utils/userloc.pl b/utils/userloc.pl
deleted file mode 100755
index 4da2f4029250..000000000000
--- a/utils/userloc.pl
+++ /dev/null
@@ -1,216 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Program: userloc.pl
-#
-# Synopsis: This program uses "cvs annotate" to get a summary of how many lines
-# of code the various developres are responsible for. It takes one
-# argument, the directory to process. If the argument is not specified
-# then the cwd is used. The directory must be an LLVM tree checked out
-# from cvs.
-#
-# Syntax: userloc.pl [-tag=tag|-html... <directory>...
-#
-# Options:
-# -tag=tag
-# Use "tag" to select the revision (as per cvs -r option)
-# -filedetails
-# Report details about lines of code in each file for each user
-# -html
-# Generate HTML output instead of text output
-# -topdir
-# Specify where the top llvm source directory is. Otherwise the
-# llvm-config tool is used to find it.
-# Directories:
-# The directories passed after the options should be relative paths to
-# directories of interest from the top of the llvm source tree, e.g. "lib"
-# or "include", etc.
-
-die "Usage userloc.pl [-tag=tag|-html] <directories>..."
- if ($#ARGV < 0);
-
-my $tag = "";
-my $html = 0;
-my $debug = 0;
-my $filedetails = "";
-my $srcroot = "";
-while ( defined($ARGV[0]) && substr($ARGV[0],0,1) eq '-' )
-{
- if ($ARGV[0] =~ /-tag=.*/) {
- $tag = $ARGV[0];
- $tag =~ s#-tag=(.*)#$1#;
- } elsif ($ARGV[0] =~ /-filedetails/) {
- $filedetails = 1;
- } elsif ($ARGV[0] eq "-html") {
- $html = 1;
- } elsif ($ARGV[0] eq "-debug") {
- $debug = 1;
- } elsif ($ARGV[0] eq "-topdir") {
- shift; $srcroot = $ARGV[0]; shift;
- } else {
- die "Invalid option: $ARGV[0]";
- }
- shift;
-}
-
-if (length($srcroot) == 0) {
- chomp($srcroot = `llvm-config --src-root`);
-}
-if (! -d "$srcroot") {
- die "Invalid source root: $srcroot\n";
-}
-chdir($srcroot);
-my $llvmdo = "$srcroot/utils/llvmdo -topdir '$srcroot'";
-my %Stats;
-my %FileStats;
-
-my $annotate = "cvs -z6 annotate -lf ";
-if (length($tag) > 0)
-{
- $annotate = $annotate . " -r" . $tag;
-}
-
-sub GetCVSFiles
-{
- my $d = $_[0];
- my $files ="";
- open FILELIST,
- "$llvmdo -dirs \"$d\" -code-only echo |" || die "Can't get list of files with llvmdo";
- while ( defined($line = <FILELIST>) ) {
- chomp($file = $line);
- print "File: $file\n" if ($debug);
- $files = "$files $file";
- }
- return $files;
-}
-
-sub ScanDir
-{
- my $Dir = $_[0];
- my $files = GetCVSFiles($Dir);
-
- open (DATA,"$annotate $files 2>&1 |")
- || die "Can't read cvs annotation data";
-
- my $curfile = "";
- while ( defined($line = <DATA>) )
- {
- chomp($line);
- if ($line =~ '^Annotations for.*') {
- $curfile = $line;
- $curfile =~ s#^Annotations for ([[:print:]]*)#$1#;
- print "Scanning: $curfile\n" if ($debug);
- } elsif ($line =~ /^[0-9.]*[ \t]*\([^)]*\):/) {
- $uname = $line;
- $uname =~ s#^[0-9.]*[ \t]*\(([a-zA-Z0-9_.-]*) [^)]*\):.*#$1#;
- $Stats{$uname}++;
- if ($filedetails) {
- $FileStats{$uname} = {} unless exists $FileStats{$uname};
- ${$FileStats{$uname}}{$curfile}++;
- }
- }
- }
- close DATA;
-}
-
-sub printStats
-{
- my $dir = $_[0];
- my $hash = $_[1];
- my $user;
- my $total = 0;
-
- foreach $user (keys %Stats) { $total += $Stats{$user}; }
-
- if ($html) {
- print "<p>Total Source Lines: $total<br/></p>\n";
- print "<table>";
- print " <tr><th style=\"text-align:right\">LOC</th>\n";
- print " <th style=\"text-align:right\">\%LOC</th>\n";
- print " <th style=\"text-align:left\">User</th>\n";
- print "</tr>\n";
- }
-
- foreach $user ( sort keys %Stats )
- {
- my $v = $Stats{$user};
- if (defined($v))
- {
- if ($html) {
- printf "<tr><td style=\"text-align:right\">%d</td><td style=\"text-align:right\">(%4.1f%%)</td><td style=\"text-align:left\">", $v, (100.0/$total)*$v;
- if ($filedetails) {
- print "<a href=\"#$user\">$user</a></td></tr>";
- } else {
- print $user,"</td></tr>";
- }
- } else {
- printf "%8d (%4.1f%%) %s\n", $v, (100.0/$total)*$v, $user;
- }
- }
- }
- print "</table>\n" if ($html);
-
- if ($filedetails) {
- foreach $user (sort keys %FileStats) {
- my $total = 0;
- foreach $file (sort keys %{$FileStats{$user}}) {
- $total += ${$FileStats{$user}}{$file}
- }
- if ($html) {
- print "<table><tr><th style=\"text-align:left\" colspan=\"3\"><a name=\"$user\">$user</a></th></tr>\n";
- } else {
- print $user,":\n";
- }
- foreach $file (sort keys %{$FileStats{$user}}) {
- my $v = ${$FileStats{$user}}{$file};
- if ($html) {
- printf "<tr><td style=\"text-align:right\">&nbsp;&nbsp;%d</td><td
- style=\"text-align:right\">&nbsp;%4.1f%%</td><td
- style=\"text-align:left\">%s</td></tr>",$v, (100.0/$total)*$v,$file;
- } else {
- printf "%8d (%4.1f%%) %s\n", $v, (100.0/$total)*$v, $file;
- }
- }
- if ($html) { print "</table>\n"; }
- }
- }
-}
-
-
-if ($html)
-{
-print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">\n";
-print "<html>\n<head>\n";
-print " <title>LLVM LOC Based On CVS Annotation</title>\n";
-print " <link rel=\"stylesheet\" href=\"llvm.css\" type=\"text/css\"/>\n";
-print "</head>\n";
-print "<body><div class=\"doc_title\">LLVM LOC Based On CVS Annotation</div>\n";
-print "<p>This document shows the total lines of code per user in each\n";
-print "LLVM directory. Lines of code are attributed by the user that last\n";
-print "committed the line. This does not necessarily reflect authorship.</p>\n";
-}
-
-my @DIRS;
-if ($#ARGV > 0) {
- @DIRS = @ARGV;
-} else {
- push @DIRS, 'include';
- push @DIRS, 'lib';
- push @DIRS, 'tools';
- push @DIRS, 'runtime';
- push @DIRS, 'docs';
- push @DIRS, 'test';
- push @DIRS, 'utils';
- push @DIRS, 'examples';
- push @DIRS, 'projects/Stacker';
- push @DIRS, 'projects/sample';
- push @DIRS, 'autoconf';
-}
-
-for $Index ( 0 .. $#DIRS) {
- print "Scanning Dir: $DIRS[$Index]\n" if ($debug);
- ScanDir($DIRS[$Index]);
-}
-
-printStats;
-
-print "</body></html>\n" if ($html) ;
diff --git a/utils/valgrind/i386-pc-linux-gnu.supp b/utils/valgrind/i386-pc-linux-gnu.supp
index f8fd99a7f63d..c9f68a0ab66c 100644
--- a/utils/valgrind/i386-pc-linux-gnu.supp
+++ b/utils/valgrind/i386-pc-linux-gnu.supp
@@ -5,3 +5,37 @@
fun:_ZN83_GLOBAL_*PassRegistrar12RegisterPassERKN4llvm8PassInfoE
fun:_ZN4llvm8PassInfo12registerPassEv
}
+
+# Python false positives according to
+# http://svn.python.org/projects/python/trunk/Misc/README.valgrind
+
+{
+ ADDRESS_IN_RANGE/Invalid read of size 4
+ Memcheck:Addr4
+ obj:/usr/bin/python2.5
+}
+
+{
+ ADDRESS_IN_RANGE/Invalid read of size 4
+ Memcheck:Value4
+ obj:/usr/bin/python2.5
+}
+
+{
+ ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value
+ Memcheck:Cond
+ obj:/usr/bin/python2.5
+}
+
+{
+ We don't care if as leaks
+ Memcheck:Leak
+ obj:/usr/bin/as
+}
+
+{
+ We don't care if python leaks
+ Memcheck:Leak
+ fun:malloc
+ obj:/usr/bin/python2.5
+}
diff --git a/utils/valgrind/x86_64-pc-linux-gnu.supp b/utils/valgrind/x86_64-pc-linux-gnu.supp
index f8fd99a7f63d..f5aae990f697 100644
--- a/utils/valgrind/x86_64-pc-linux-gnu.supp
+++ b/utils/valgrind/x86_64-pc-linux-gnu.supp
@@ -2,6 +2,45 @@
False leak under RegisterPass
Memcheck:Leak
...
- fun:_ZN83_GLOBAL_*PassRegistrar12RegisterPassERKN4llvm8PassInfoE
- fun:_ZN4llvm8PassInfo12registerPassEv
+ fun:_ZN4llvm12PassRegistry12registerPassERKNS_8PassInfoE
+}
+
+# Python false positives according to
+# http://svn.python.org/projects/python/trunk/Misc/README.valgrind
+
+{
+ ADDRESS_IN_RANGE/Invalid read of size 4
+ Memcheck:Addr4
+ obj:/usr/bin/python2.5
+}
+
+{
+ ADDRESS_IN_RANGE/Invalid read of size 4
+ Memcheck:Value8
+ obj:/usr/bin/python2.5
+}
+
+{
+ ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value
+ Memcheck:Cond
+ obj:/usr/bin/python2.5
+}
+
+{
+ We don't care if as leaks
+ Memcheck:Leak
+ obj:/usr/bin/as
+}
+
+{
+ We don't care if grep leaks
+ Memcheck:Leak
+ obj:/bin/grep
+}
+
+{
+ We don't care if python leaks
+ Memcheck:Leak
+ fun:malloc
+ obj:/usr/bin/python2.5
}
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 518aa041b8b9..acebc20bc344 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -1,7 +1,7 @@
" Vim syntax file
" Language: llvm
" Maintainer: The LLVM team, http://llvm.org/
-" Version: $Revision: 97271 $
+" Version: $Revision: 112382 $
if version < 600
syntax clear
@@ -57,14 +57,12 @@ syn keyword llvmKeyword module asm align tail to
syn keyword llvmKeyword addrspace section alias sideeffect c gc
syn keyword llvmKeyword target datalayout triple
syn keyword llvmKeyword blockaddress
-syn keyword llvmKeyword union
" Obsolete keywords.
-syn keyword llvmError uninitialized implementation
-syn keyword llvmError getresult big little endian begin end
+syn keyword llvmError getresult begin end
" Misc syntax.
-syn match llvmIgnore /[%@]\d\+\>/
+syn match llvmNoName /[%@]\d\+\>/
syn match llvmNumber /-\?\<\d\+\>/
syn match llvmFloat /-\?\<\d\+\.\d*\(e[+-]\d\+\)\?\>/
syn match llvmFloat /\<0x\x\+\>/
@@ -99,7 +97,7 @@ if version >= 508 || !exists("did_c_syn_inits")
HiLink llvmKeyword Keyword
HiLink llvmBoolean Boolean
HiLink llvmFloat Float
- HiLink llvmIgnore Ignore
+ HiLink llvmNoName Identifier
HiLink llvmConstant Constant
HiLink llvmSpecialComment SpecialComment
HiLink llvmError Error
diff --git a/utils/vim/vimrc b/utils/vim/vimrc
index 63108f26d0fc..1f314c2e3f37 100644
--- a/utils/vim/vimrc
+++ b/utils/vim/vimrc
@@ -1,5 +1,5 @@
" LLVM coding guidelines conformance for VIM
-" $Revision: 97273 $
+" $Revision: 112982 $
"
" Maintainer: The LLVM Team, http://llvm.org
" WARNING: Read before you source in all these commands and macros! Some
@@ -91,3 +91,130 @@ augroup END
"set showmode
"set incsearch
"set ruler
+
+" Clang code-completion support. This is highly experimental!
+
+" A path to a clang executable.
+let g:clang_path = "clang++"
+
+" A list of options to add to the clang commandline, for example to add
+" include paths, predefined macros, and language options.
+let g:clang_opts = [
+ \ "-x","c++",
+ \ "-D__STDC_LIMIT_MACROS=1","-D__STDC_CONSTANT_MACROS=1",
+ \ "-Iinclude" ]
+
+function! ClangComplete(findstart, base)
+ if a:findstart == 1
+ " In findstart mode, look for the beginning of the current identifier.
+ let l:line = getline('.')
+ let l:start = col('.') - 1
+ while l:start > 0 && l:line[l:start - 1] =~ '\i'
+ let l:start -= 1
+ endwhile
+ return l:start
+ endif
+
+ " Get the current line and column numbers.
+ let l:l = line('.')
+ let l:c = col('.')
+
+ " Build a clang commandline to do code completion on stdin.
+ let l:the_command = shellescape(g:clang_path) .
+ \ " -cc1 -code-completion-at=-:" . l:l . ":" . l:c
+ for l:opt in g:clang_opts
+ let l:the_command .= " " . shellescape(l:opt)
+ endfor
+
+ " Copy the contents of the current buffer into a string for stdin.
+ " TODO: The extra space at the end is for working around clang's
+ " apparent inability to do code completion at the very end of the
+ " input.
+ " TODO: Is it better to feed clang the entire file instead of truncating
+ " it at the current line?
+ let l:process_input = join(getline(1, l:l), "\n") . " "
+
+ " Run it!
+ let l:input_lines = split(system(l:the_command, l:process_input), "\n")
+
+ " Parse the output.
+ for l:input_line in l:input_lines
+ " Vim's substring operator is annoyingly inconsistent with python's.
+ if l:input_line[:11] == 'COMPLETION: '
+ let l:value = l:input_line[12:]
+
+ " Chop off anything after " : ", if present, and move it to the menu.
+ let l:menu = ""
+ let l:spacecolonspace = stridx(l:value, " : ")
+ if l:spacecolonspace != -1
+ let l:menu = l:value[l:spacecolonspace+3:]
+ let l:value = l:value[:l:spacecolonspace-1]
+ endif
+
+ " Chop off " (Hidden)", if present, and move it to the menu.
+ let l:hidden = stridx(l:value, " (Hidden)")
+ if l:hidden != -1
+ let l:menu .= " (Hidden)"
+ let l:value = l:value[:l:hidden-1]
+ endif
+
+ " Handle "Pattern". TODO: Make clang less weird.
+ if l:value == "Pattern"
+ let l:value = l:menu
+ let l:pound = stridx(l:value, "#")
+ " Truncate the at the first [#, <#, or {#.
+ if l:pound != -1
+ let l:value = l:value[:l:pound-2]
+ endif
+ endif
+
+ " Filter out results which don't match the base string.
+ if a:base != ""
+ if l:value[:strlen(a:base)-1] != a:base
+ continue
+ end
+ endif
+
+ " TODO: Don't dump the raw input into info, though it's nice for now.
+ " TODO: The kind string?
+ let l:item = {
+ \ "word": l:value,
+ \ "menu": l:menu,
+ \ "info": l:input_line,
+ \ "dup": 1 }
+
+ " Report a result.
+ if complete_add(l:item) == 0
+ return []
+ endif
+ if complete_check()
+ return []
+ endif
+
+ elseif l:input_line[:9] == "OVERLOAD: "
+ " An overload candidate. Use a crazy hack to get vim to
+ " display the results. TODO: Make this better.
+ let l:value = l:input_line[10:]
+ let l:item = {
+ \ "word": " ",
+ \ "menu": l:value,
+ \ "info": l:input_line,
+ \ "dup": 1}
+
+ " Report a result.
+ if complete_add(l:item) == 0
+ return []
+ endif
+ if complete_check()
+ return []
+ endif
+
+ endif
+ endfor
+
+
+ return []
+endfunction ClangComplete
+
+" Uncomment this to enable the highly-broken autocompletion support.
+"set omnifunc=ClangComplete